From bc0c38d139ec7fcd5c030aea16b008f3732e42ac Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: latency tracer infrastructure This patch adds the latency tracer infrastructure. This patch does not add anything that will select and turn it on, but will be used by later patches. If it were to be compiled, it would add the following files to the debugfs: The root tracing directory: /debugfs/tracing/ This patch also adds the following files: available_tracers list of available tracers. Currently no tracers are available. Looking into this file only shows "none" which is used to unregister all tracers. current_tracer The trace that is currently active. Empty on start up. To switch to a tracer simply echo one of the tracers that are listed in available_tracers: example: (used with later patches) echo function > /debugfs/tracing/current_tracer To disable the tracer: echo disable > /debugfs/tracing/current_tracer tracing_enabled echoing "1" into this file starts the ftrace function tracing (if sysctl kernel.ftrace_enabled=1) echoing "0" turns it off. latency_trace This file is readonly and holds the result of the trace. trace This file outputs a easier to read version of the trace. iter_ctrl Controls the way the output of traces look. So far there's two controls: echoing in "symonly" will only show the kallsyms variables without the addresses (if kallsyms was configured) echoing in "verbose" will change the output to show a lot more data, but not very easy to understand by humans. echoing in "nosymonly" turns off symonly. echoing in "noverbose" turns off verbose. Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 1547 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1547 insertions(+) create mode 100644 kernel/trace/trace.c (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c new file mode 100644 index 00000000000..1b8eca7650d --- /dev/null +++ b/kernel/trace/trace.c @@ -0,0 +1,1547 @@ +/* + * ring buffer based function tracer + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * Originally taken from the RT patch by: + * Arnaldo Carvalho de Melo + * + * Based on code from the latency_tracer, that is: + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_thresh; + +static long notrace +ns2usecs(cycle_t nsec) +{ + nsec += 500; + do_div(nsec, 1000); + return nsec; +} + +static atomic_t tracer_counter; +static struct trace_array global_trace; + +static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); + +static struct trace_array max_tr; + +static DEFINE_PER_CPU(struct trace_array_cpu, max_data); + +static int tracer_enabled; +static unsigned long trace_nr_entries = 4096UL; + +static struct tracer *trace_types __read_mostly; +static struct tracer *current_trace __read_mostly; +static int max_tracer_type_len; + +static DEFINE_MUTEX(trace_types_lock); + +static int __init set_nr_entries(char *str) +{ + if (!str) + return 0; + trace_nr_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("trace_entries=", set_nr_entries); + +enum trace_type { + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_CTX, + + __TRACE_LAST_TYPE +}; + +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_HARDIRQ = 0x04, + TRACE_FLAG_SOFTIRQ = 0x08, +}; + +enum trace_iterator_flags { + TRACE_ITER_PRINT_PARENT = 0x01, + TRACE_ITER_SYM_OFFSET = 0x02, + TRACE_ITER_SYM_ADDR = 0x04, + TRACE_ITER_VERBOSE = 0x08, +}; + +#define TRACE_ITER_SYM_MASK \ + (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + +/* These must match the bit postions above */ +static const char *trace_options[] = { + "print-parent", + "sym-offset", + "sym-addr", + "verbose", + NULL +}; + +static unsigned trace_flags; + + +/* + * Copy the new maximum trace into the separate maximum-trace + * structure. (this way the maximum trace is permanently saved, + * for later retrieval via /debugfs/tracing/latency_trace) + */ +static void notrace +__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data = tr->data[cpu]; + + max_tr.cpu = cpu; + max_tr.time_start = data->preempt_timestamp; + + data = max_tr.data[cpu]; + data->saved_latency = tracing_max_latency; + + memcpy(data->comm, tsk->comm, TASK_COMM_LEN); + data->pid = tsk->pid; + data->uid = tsk->uid; + data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; + data->policy = tsk->policy; + data->rt_priority = tsk->rt_priority; + + /* record this tasks comm */ + tracing_record_cmdline(current); +} + +notrace void +update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data; + void *save_trace; + int i; + + /* clear out all the previous traces */ + for_each_possible_cpu(i) { + data = tr->data[i]; + save_trace = max_tr.data[i]->trace; + memcpy(max_tr.data[i], data, sizeof(*data)); + data->trace = save_trace; + } + + __update_max_tr(tr, tsk, cpu); +} + +/** + * update_max_tr_single - only copy one trace over, and reset the rest + * @tr - tracer + * @tsk - task with the latency + * @cpu - the cpu of the buffer to copy. + */ +notrace void +update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data = tr->data[cpu]; + void *save_trace; + int i; + + for_each_possible_cpu(i) + tracing_reset(max_tr.data[i]); + + save_trace = max_tr.data[cpu]->trace; + memcpy(max_tr.data[cpu], data, sizeof(*data)); + data->trace = save_trace; + + __update_max_tr(tr, tsk, cpu); +} + +int register_tracer(struct tracer *type) +{ + struct tracer *t; + int len; + int ret = 0; + + if (!type->name) { + pr_info("Tracer must have a name\n"); + return -1; + } + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(type->name, t->name) == 0) { + /* already found */ + pr_info("Trace %s already registered\n", + type->name); + ret = -1; + goto out; + } + } + + type->next = trace_types; + trace_types = type; + len = strlen(type->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + out: + mutex_unlock(&trace_types_lock); + + return ret; +} + +void unregister_tracer(struct tracer *type) +{ + struct tracer **t; + int len; + + mutex_lock(&trace_types_lock); + for (t = &trace_types; *t; t = &(*t)->next) { + if (*t == type) + goto found; + } + pr_info("Trace %s not registered\n", type->name); + goto out; + + found: + *t = (*t)->next; + if (strlen(type->name) != max_tracer_type_len) + goto out; + + max_tracer_type_len = 0; + for (t = &trace_types; *t; t = &(*t)->next) { + len = strlen((*t)->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + } + out: + mutex_unlock(&trace_types_lock); +} + +void notrace tracing_reset(struct trace_array_cpu *data) +{ + data->trace_idx = 0; + atomic_set(&data->underrun, 0); +} + +#ifdef CONFIG_FTRACE +static void notrace +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = function_trace_call, +}; +#endif + +notrace void tracing_start_function_trace(void) +{ + register_ftrace_function(&trace_ops); +} + +notrace void tracing_stop_function_trace(void) +{ + unregister_ftrace_function(&trace_ops); +} + +#define SAVED_CMDLINES 128 +static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; +static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; +static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; +static int cmdline_idx; +static DEFINE_SPINLOCK(trace_cmdline_lock); +atomic_t trace_record_cmdline_disabled; + +static void trace_init_cmdlines(void) +{ + memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline)); + memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid)); + cmdline_idx = 0; +} + +notrace void trace_stop_cmdline_recording(void); + +static void notrace trace_save_cmdline(struct task_struct *tsk) +{ + unsigned map; + unsigned idx; + + if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) + return; + + /* + * It's not the end of the world if we don't get + * the lock, but we also don't want to spin + * nor do we want to disable interrupts, + * so if we miss here, then better luck next time. + */ + if (!spin_trylock(&trace_cmdline_lock)) + return; + + idx = map_pid_to_cmdline[tsk->pid]; + if (idx >= SAVED_CMDLINES) { + idx = (cmdline_idx + 1) % SAVED_CMDLINES; + + map = map_cmdline_to_pid[idx]; + if (map <= PID_MAX_DEFAULT) + map_pid_to_cmdline[map] = (unsigned)-1; + + map_pid_to_cmdline[tsk->pid] = idx; + + cmdline_idx = idx; + } + + memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); + + spin_unlock(&trace_cmdline_lock); +} + +static notrace char *trace_find_cmdline(int pid) +{ + char *cmdline = "<...>"; + unsigned map; + + if (!pid) + return ""; + + if (pid > PID_MAX_DEFAULT) + goto out; + + map = map_pid_to_cmdline[pid]; + if (map >= SAVED_CMDLINES) + goto out; + + cmdline = saved_cmdlines[map]; + + out: + return cmdline; +} + +notrace void tracing_record_cmdline(struct task_struct *tsk) +{ + if (atomic_read(&trace_record_cmdline_disabled)) + return; + + trace_save_cmdline(tsk); +} + +static inline notrace struct trace_entry * +tracing_get_trace_entry(struct trace_array *tr, + struct trace_array_cpu *data) +{ + unsigned long idx, idx_next; + struct trace_entry *entry; + + idx = data->trace_idx; + idx_next = idx + 1; + + if (unlikely(idx_next >= tr->entries)) { + atomic_inc(&data->underrun); + idx_next = 0; + } + + data->trace_idx = idx_next; + + if (unlikely(idx_next != 0 && atomic_read(&data->underrun))) + atomic_inc(&data->underrun); + + entry = data->trace + idx * TRACE_ENTRY_SIZE; + + return entry; +} + +static inline notrace void +tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags) +{ + struct task_struct *tsk = current; + unsigned long pc; + + pc = preempt_count(); + + entry->idx = atomic_inc_return(&tracer_counter); + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(raw_smp_processor_id()); + entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); +} + +notrace void +ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, + unsigned long flags) +{ + struct trace_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; +} + +notrace void +tracing_sched_switch_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *prev, struct task_struct *next, + unsigned long flags) +{ + struct trace_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_CTX; + entry->ctx.prev_pid = prev->pid; + entry->ctx.prev_prio = prev->prio; + entry->ctx.prev_state = prev->state; + entry->ctx.next_pid = next->pid; + entry->ctx.next_prio = next->prio; +} + +enum trace_file_type { + TRACE_FILE_LAT_FMT = 1, +}; + +static struct trace_entry * +trace_entry_idx(struct trace_array *tr, unsigned long idx, int cpu) +{ + struct trace_entry *array = tr->data[cpu]->trace; + unsigned long underrun; + + if (idx >= tr->entries) + return NULL; + + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) + idx = ((underrun - 1) + idx) % tr->entries; + else if (idx >= tr->data[cpu]->trace_idx) + return NULL; + + return &array[idx]; +} + +static struct notrace trace_entry * +find_next_entry(struct trace_iterator *iter, int *ent_cpu) +{ + struct trace_array *tr = iter->tr; + struct trace_entry *ent, *next = NULL; + int next_cpu = -1; + int cpu; + + for_each_possible_cpu(cpu) { + if (!tr->data[cpu]->trace) + continue; + ent = trace_entry_idx(tr, iter->next_idx[cpu], cpu); + if (ent && + (!next || (long)(next->idx - ent->idx) > 0)) { + next = ent; + next_cpu = cpu; + } + } + + if (ent_cpu) + *ent_cpu = next_cpu; + + return next; +} + +static void *find_next_entry_inc(struct trace_iterator *iter) +{ + struct trace_entry *next; + int next_cpu = -1; + + next = find_next_entry(iter, &next_cpu); + + if (next) { + iter->next_idx[next_cpu]++; + iter->idx++; + } + iter->ent = next; + iter->cpu = next_cpu; + + return next ? iter : NULL; +} + +static void notrace * +s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_iterator *iter = m->private; + void *ent; + void *last_ent = iter->ent; + int i = (int)*pos; + + (*pos)++; + + /* can't go backwards */ + if (iter->idx > i) + return NULL; + + if (iter->idx < 0) + ent = find_next_entry_inc(iter); + else + ent = iter; + + while (ent && iter->idx < i) + ent = find_next_entry_inc(iter); + + iter->pos = *pos; + + if (last_ent && !ent) + seq_puts(m, "\n\nvim:ft=help\n"); + + return ent; +} + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + struct trace_iterator *iter = m->private; + void *p = NULL; + loff_t l = 0; + int i; + + mutex_lock(&trace_types_lock); + + if (!current_trace || current_trace != iter->trace) + return NULL; + + atomic_inc(&trace_record_cmdline_disabled); + + /* let the tracer grab locks here if needed */ + if (current_trace->start) + current_trace->start(iter); + + if (*pos != iter->pos) { + iter->ent = NULL; + iter->cpu = 0; + iter->idx = -1; + + for (i = 0; i < NR_CPUS; i++) + iter->next_idx[i] = 0; + + for (p = iter; p && l < *pos; p = s_next(m, p, &l)) + ; + + } else { + l = *pos; + p = s_next(m, p, &l); + } + + return p; +} + +static void s_stop(struct seq_file *m, void *p) +{ + struct trace_iterator *iter = m->private; + + atomic_dec(&trace_record_cmdline_disabled); + + /* let the tracer release locks here if needed */ + if (current_trace && current_trace == iter->trace && iter->trace->stop) + iter->trace->stop(iter); + + mutex_unlock(&trace_types_lock); +} + +static void +seq_print_sym_short(struct seq_file *m, const char *fmt, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + + kallsyms_lookup(address, NULL, NULL, NULL, str); + + seq_printf(m, fmt, str); +#endif +} + +static void +seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + + sprint_symbol(str, address); + seq_printf(m, fmt, str); +#endif +} + +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + +static void notrace +seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) +{ + if (!ip) { + seq_printf(m, "0"); + return; + } + + if (sym_flags & TRACE_ITER_SYM_OFFSET) + seq_print_sym_offset(m, "%s", ip); + else + seq_print_sym_short(m, "%s", ip); + + if (sym_flags & TRACE_ITER_SYM_ADDR) + seq_printf(m, " <" IP_FMT ">", ip); +} + +static void notrace print_lat_help_header(struct seq_file *m) +{ + seq_puts(m, "# _------=> CPU# \n"); + seq_puts(m, "# / _-----=> irqs-off \n"); + seq_puts(m, "# | / _----=> need-resched \n"); + seq_puts(m, "# || / _---=> hardirq/softirq \n"); + seq_puts(m, "# ||| / _--=> preempt-depth \n"); + seq_puts(m, "# |||| / \n"); + seq_puts(m, "# ||||| delay \n"); + seq_puts(m, "# cmd pid ||||| time | caller \n"); + seq_puts(m, "# \\ / ||||| \\ | / \n"); +} + +static void notrace print_func_help_header(struct seq_file *m) +{ + seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | |\n"); +} + + +static void notrace +print_trace_header(struct seq_file *m, struct trace_iterator *iter) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_array *tr = iter->tr; + struct trace_array_cpu *data = tr->data[tr->cpu]; + struct tracer *type = current_trace; + unsigned long underruns = 0; + unsigned long underrun; + unsigned long entries = 0; + int cpu; + const char *name = "preemption"; + + if (type) + name = type->name; + + for_each_possible_cpu(cpu) { + if (tr->data[cpu]->trace) { + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) { + underruns += underrun; + entries += tr->entries; + } else + entries += tr->data[cpu]->trace_idx; + } + } + + seq_printf(m, "%s latency trace v1.1.5 on %s\n", + name, UTS_RELEASE); + seq_puts(m, "-----------------------------------" + "---------------------------------\n"); + seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" + " (M:%s VP:%d, KP:%d, SP:%d HP:%d", + data->saved_latency, + entries, + (entries + underruns), + tr->cpu, +#if defined(CONFIG_PREEMPT_NONE) + "server", +#elif defined(CONFIG_PREEMPT_VOLUNTARY) + "desktop", +#elif defined(CONFIG_PREEMPT_DESKTOP) + "preempt", +#else + "unknown", +#endif + /* These are reserved for later use */ + 0, 0, 0, 0); +#ifdef CONFIG_SMP + seq_printf(m, " #P:%d)\n", num_online_cpus()); +#else + seq_puts(m, ")\n"); +#endif + seq_puts(m, " -----------------\n"); + seq_printf(m, " | task: %.16s-%d " + "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", + data->comm, data->pid, data->uid, data->nice, + data->policy, data->rt_priority); + seq_puts(m, " -----------------\n"); + + if (data->critical_start) { + seq_puts(m, " => started at: "); + seq_print_ip_sym(m, data->critical_start, sym_flags); + seq_puts(m, "\n => ended at: "); + seq_print_ip_sym(m, data->critical_end, sym_flags); + seq_puts(m, "\n"); + } + + seq_puts(m, "\n"); +} + +unsigned long nsecs_to_usecs(unsigned long nsecs) +{ + return nsecs / 1000; +} + +static void notrace +lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) +{ + int hardirq, softirq; + char *comm; + + comm = trace_find_cmdline(entry->pid); + + seq_printf(m, "%8.8s-%-5d ", comm, entry->pid); + seq_printf(m, "%d", cpu); + seq_printf(m, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + seq_putc(m, 'H'); + else { + if (hardirq) + seq_putc(m, 'h'); + else { + if (softirq) + seq_putc(m, 's'); + else + seq_putc(m, '.'); + } + } + + if (entry->preempt_count) + seq_printf(m, "%x", entry->preempt_count); + else + seq_puts(m, "."); +} + +unsigned long preempt_mark_thresh = 100; + +static void notrace +lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4lldus", abs_usecs); + if (rel_usecs > preempt_mark_thresh) + seq_puts(m, "!: "); + else if (rel_usecs > 1) + seq_puts(m, "+: "); + else + seq_puts(m, " : "); +} + +static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; + +static void notrace +print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, + unsigned int trace_idx, int cpu) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_entry *next_entry = find_next_entry(iter, NULL); + unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); + struct trace_entry *entry = iter->ent; + unsigned long abs_usecs; + unsigned long rel_usecs; + char *comm; + int S; + + if (!next_entry) + next_entry = entry; + rel_usecs = ns2usecs(next_entry->t - entry->t); + abs_usecs = ns2usecs(entry->t - iter->tr->time_start); + + if (verbose) { + comm = trace_find_cmdline(entry->pid); + seq_printf(m, "%16s %5d %d %d %08x %08x [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + comm, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, + ns2usecs(entry->t), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + } else { + lat_print_generic(m, entry, cpu); + lat_print_timestamp(m, abs_usecs, rel_usecs); + } + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_flags); + seq_puts(m, " ("); + seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + seq_puts(m, ")\n"); + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + comm = trace_find_cmdline(entry->ctx.next_pid); + seq_printf(m, " %d:%d:%c --> %d:%d %s\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio, + comm); + break; + } +} + +static void notrace +print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_entry *entry = iter->ent; + unsigned long usec_rem; + unsigned long long t; + unsigned long secs; + char *comm; + int S; + + comm = trace_find_cmdline(iter->ent->pid); + + t = ns2usecs(entry->t); + usec_rem = do_div(t, 1000000ULL); + secs = (unsigned long)t; + + seq_printf(m, "%16s-%-5d ", comm, entry->pid); + seq_printf(m, "[%02d] ", iter->cpu); + seq_printf(m, "%5lu.%06lu: ", secs, usec_rem); + + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_flags); + if ((sym_flags & TRACE_ITER_PRINT_PARENT) && + entry->fn.parent_ip) { + seq_printf(m, " <-"); + seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + } + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + seq_printf(m, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + break; + } + seq_printf(m, "\n"); +} + +static int trace_empty(struct trace_iterator *iter) +{ + struct trace_array_cpu *data; + int cpu; + + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (data->trace && + (data->trace_idx || + atomic_read(&data->underrun))) + return 0; + } + return 1; +} + +static int s_show(struct seq_file *m, void *v) +{ + struct trace_iterator *iter = v; + + if (iter->ent == NULL) { + if (iter->tr) { + seq_printf(m, "# tracer: %s\n", iter->trace->name); + seq_puts(m, "#\n"); + } + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return 0; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } + } else { + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + print_lat_fmt(m, iter, iter->idx, iter->cpu); + else + print_trace_fmt(m, iter); + } + + return 0; +} + +static struct seq_operations tracer_seq_ops = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static struct trace_iterator notrace * +__tracing_open(struct inode *inode, struct file *file, int *ret) +{ + struct trace_iterator *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + *ret = -ENOMEM; + goto out; + } + + mutex_lock(&trace_types_lock); + if (current_trace && current_trace->print_max) + iter->tr = &max_tr; + else + iter->tr = inode->i_private; + iter->trace = current_trace; + iter->pos = -1; + + /* TODO stop tracer */ + *ret = seq_open(file, &tracer_seq_ops); + if (!*ret) { + struct seq_file *m = file->private_data; + m->private = iter; + + /* stop the trace while dumping */ + if (iter->tr->ctrl) + tracer_enabled = 0; + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + } else { + kfree(iter); + iter = NULL; + } + mutex_unlock(&trace_types_lock); + + out: + return iter; +} + +int tracing_open_generic(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +int tracing_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct trace_iterator *iter = m->private; + + mutex_lock(&trace_types_lock); + if (iter->trace && iter->trace->close) + iter->trace->close(iter); + + /* reenable tracing if it was previously enabled */ + if (iter->tr->ctrl) + tracer_enabled = 1; + mutex_unlock(&trace_types_lock); + + seq_release(inode, file); + kfree(iter); + return 0; +} + +static int tracing_open(struct inode *inode, struct file *file) +{ + int ret; + + __tracing_open(inode, file, &ret); + + return ret; +} + +static int tracing_lt_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret; + + iter = __tracing_open(inode, file, &ret); + + if (!ret) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + + return ret; +} + + +static void notrace * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct tracer *t = m->private; + + (*pos)++; + + if (t) + t = t->next; + + m->private = t; + + return t; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct tracer *t = m->private; + loff_t l = 0; + + mutex_lock(&trace_types_lock); + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&trace_types_lock); +} + +static int t_show(struct seq_file *m, void *v) +{ + struct tracer *t = v; + + if (!t) + return 0; + + seq_printf(m, "%s", t->name); + if (t->next) + seq_putc(m, ' '); + else + seq_putc(m, '\n'); + + return 0; +} + +static struct seq_operations show_traces_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int show_traces_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &show_traces_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = trace_types; + } + + return ret; +} + +static struct file_operations tracing_fops = { + .open = tracing_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations tracing_lt_fops = { + .open = tracing_lt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations show_traces_fops = { + .open = show_traces_open, + .read = seq_read, + .release = seq_release, +}; + +static ssize_t +tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf; + int r = 0; + int len = 0; + int i; + + /* calulate max size */ + for (i = 0; trace_options[i]; i++) { + len += strlen(trace_options[i]); + len += 3; /* "no" and space */ + } + + /* +2 for \n and \0 */ + buf = kmalloc(len + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; trace_options[i]; i++) { + if (trace_flags & (1 << i)) + r += sprintf(buf + r, "%s ", trace_options[i]); + else + r += sprintf(buf + r, "no%s ", trace_options[i]); + } + + r += sprintf(buf + r, "\n"); + WARN_ON(r >= len + 2); + + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); + + kfree(buf); + + return r; +} + +static ssize_t +tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp = buf; + int neg = 0; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (strncmp(buf, "no", 2) == 0) { + neg = 1; + cmp += 2; + } + + for (i = 0; trace_options[i]; i++) { + int len = strlen(trace_options[i]); + + if (strncmp(cmp, trace_options[i], len) == 0) { + if (neg) + trace_flags &= ~(1 << i); + else + trace_flags |= (1 << i); + break; + } + } + + filp->f_pos += cnt; + + return cnt; +} + +static struct file_operations tracing_iter_fops = { + .open = tracing_open_generic, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, +}; + +static ssize_t +tracing_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%ld\n", tr->ctrl); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_ctrl_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + val = !!val; + + mutex_lock(&trace_types_lock); + if (tr->ctrl ^ val) { + if (val) + tracer_enabled = 1; + else + tracer_enabled = 0; + + tr->ctrl = val; + + if (current_trace && current_trace->ctrl_update) + current_trace->ctrl_update(tr); + } + mutex_unlock(&trace_types_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t +tracing_set_trace_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+2]; + int r; + + mutex_lock(&trace_types_lock); + if (current_trace) + r = sprintf(buf, "%s\n", current_trace->name); + else + r = sprintf(buf, "\n"); + mutex_unlock(&trace_types_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = &global_trace; + struct tracer *t; + char buf[max_tracer_type_len+1]; + int i; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(t->name, buf) == 0) + break; + } + if (!t || t == current_trace) + goto out; + + if (current_trace && current_trace->reset) + current_trace->reset(tr); + + current_trace = t; + if (t->init) + t->init(tr); + + out: + mutex_unlock(&trace_types_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t +tracing_max_lat_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long *ptr = filp->private_data; + char buf[64]; + int r; + + r = snprintf(buf, 64, "%ld\n", + *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); + if (r > 64) + r = 64; + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_max_lat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + long *ptr = filp->private_data; + long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + *ptr = val * 1000; + + return cnt; +} + +static struct file_operations tracing_max_lat_fops = { + .open = tracing_open_generic, + .read = tracing_max_lat_read, + .write = tracing_max_lat_write, +}; + +static struct file_operations tracing_ctrl_fops = { + .open = tracing_open_generic, + .read = tracing_ctrl_read, + .write = tracing_ctrl_write, +}; + +static struct file_operations set_tracer_fops = { + .open = tracing_open_generic, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, +}; + +#ifdef CONFIG_DYNAMIC_FTRACE + +static ssize_t +tracing_read_long(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long *p = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%ld\n", *p); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static struct file_operations tracing_read_long_fops = { + .open = tracing_open_generic, + .read = tracing_read_long, +}; +#endif + +static struct dentry *d_tracer; + +struct dentry *tracing_init_dentry(void) +{ + static int once; + + if (d_tracer) + return d_tracer; + + d_tracer = debugfs_create_dir("tracing", NULL); + + if (!d_tracer && !once) { + once = 1; + pr_warning("Could not create debugfs directory 'tracing'\n"); + return NULL; + } + + return d_tracer; +} + +static __init void tracer_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + + entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + NULL, &tracing_iter_fops); + if (!entry) + pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + + entry = debugfs_create_file("latency_trace", 0444, d_tracer, + &global_trace, &tracing_lt_fops); + if (!entry) + pr_warning("Could not create debugfs 'latency_trace' entry\n"); + + entry = debugfs_create_file("trace", 0444, d_tracer, + &global_trace, &tracing_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("current_tracer", 0444, d_tracer, + &global_trace, &set_tracer_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, + &tracing_max_lat_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_max_latency' entry\n"); + + entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); + +#ifdef CONFIG_DYNAMIC_FTRACE + entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, + &tracing_read_long_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'dyn_ftrace_total_info' entry\n"); +#endif +} + +/* dummy trace to disable tracing */ +static struct tracer no_tracer __read_mostly = +{ + .name = "none", +}; + +static inline notrace int page_order(const unsigned long size) +{ + const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); + return ilog2(roundup_pow_of_two(nr_pages)); +} + +__init static int tracer_alloc_buffers(void) +{ + const int order = page_order(trace_nr_entries * TRACE_ENTRY_SIZE); + const unsigned long size = (1UL << order) << PAGE_SHIFT; + struct trace_entry *array; + int i; + + for_each_possible_cpu(i) { + global_trace.data[i] = &per_cpu(global_trace_cpu, i); + max_tr.data[i] = &per_cpu(max_data, i); + + array = (struct trace_entry *) + __get_free_pages(GFP_KERNEL, order); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate" + " %ld bytes for trace buffer!\n", size); + goto free_buffers; + } + global_trace.data[i]->trace = array; + +/* Only allocate if we are actually using the max trace */ +#ifdef CONFIG_TRACER_MAX_TRACE + array = (struct trace_entry *) + __get_free_pages(GFP_KERNEL, order); + if (array == NULL) { + printk(KERN_ERR "wakeup tracer: failed to allocate" + " %ld bytes for trace buffer!\n", size); + goto free_buffers; + } + max_tr.data[i]->trace = array; +#endif + } + + /* + * Since we allocate by orders of pages, we may be able to + * round up a bit. + */ + global_trace.entries = size / TRACE_ENTRY_SIZE; + max_tr.entries = global_trace.entries; + + pr_info("tracer: %ld bytes allocated for %ld", + size, trace_nr_entries); + pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); + pr_info(" actual entries %ld\n", global_trace.entries); + + tracer_init_debugfs(); + + trace_init_cmdlines(); + + register_tracer(&no_tracer); + current_trace = &no_tracer; + + return 0; + + free_buffers: + for (i-- ; i >= 0; i--) { + struct trace_array_cpu *data = global_trace.data[i]; + + if (data && data->trace) { + free_pages((unsigned long)data->trace, order); + data->trace = NULL; + } + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + if (data && data->trace) { + free_pages((unsigned long)data->trace, order); + data->trace = NULL; + } +#endif + } + return -ENOMEM; +} + +device_initcall(tracer_alloc_buffers); -- cgit v1.2.3-70-g09d2 From 4c11d7aed389375253b59e2b1865eec96663c65d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: convert single large buffer into single pages. Allocating large buffers for the tracer may fail easily. This patch converts the buffer from a large ordered allocation to single pages. It uses the struct page LRU field to link the pages together. Later patches may also implement dynamic increasing and decreasing of the trace buffers. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 245 +++++++++++++++++++++++++++++++++++++++------------ kernel/trace/trace.h | 8 +- 2 files changed, 195 insertions(+), 58 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1b8eca7650d..d7ad030a4c4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); static int tracer_enabled; -static unsigned long trace_nr_entries = 4096UL; +static unsigned long trace_nr_entries = 16384UL; static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; @@ -57,6 +58,8 @@ static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); +#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) + static int __init set_nr_entries(char *str) { if (!str) @@ -103,6 +106,7 @@ static const char *trace_options[] = { static unsigned trace_flags; +static DEFINE_SPINLOCK(ftrace_max_lock); /* * Copy the new maximum trace into the separate maximum-trace @@ -136,17 +140,23 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; void *save_trace; + struct list_head save_pages; int i; + WARN_ON_ONCE(!irqs_disabled()); + spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; save_trace = max_tr.data[i]->trace; + save_pages = max_tr.data[i]->trace_pages; memcpy(max_tr.data[i], data, sizeof(*data)); data->trace = save_trace; + data->trace_pages = save_pages; } __update_max_tr(tr, tsk, cpu); + spin_unlock(&ftrace_max_lock); } /** @@ -160,16 +170,22 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; void *save_trace; + struct list_head save_pages; int i; + WARN_ON_ONCE(!irqs_disabled()); + spin_lock(&ftrace_max_lock); for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); save_trace = max_tr.data[cpu]->trace; + save_pages = max_tr.data[cpu]->trace_pages; memcpy(max_tr.data[cpu], data, sizeof(*data)); data->trace = save_trace; + data->trace_pages = save_pages; __update_max_tr(tr, tsk, cpu); + spin_unlock(&ftrace_max_lock); } int register_tracer(struct tracer *type) @@ -236,7 +252,8 @@ void unregister_tracer(struct tracer *type) void notrace tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - atomic_set(&data->underrun, 0); + data->trace_current = data->trace; + data->trace_current_idx = 0; } #ifdef CONFIG_FTRACE @@ -367,21 +384,27 @@ tracing_get_trace_entry(struct trace_array *tr, { unsigned long idx, idx_next; struct trace_entry *entry; + struct page *page; + struct list_head *next; - idx = data->trace_idx; + data->trace_idx++; + idx = data->trace_current_idx; idx_next = idx + 1; - if (unlikely(idx_next >= tr->entries)) { - atomic_inc(&data->underrun); + entry = data->trace_current + idx * TRACE_ENTRY_SIZE; + + if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { + page = virt_to_page(data->trace_current); + if (unlikely(&page->lru == data->trace_pages.prev)) + next = data->trace_pages.next; + else + next = page->lru.next; + page = list_entry(next, struct page, lru); + data->trace_current = page_address(page); idx_next = 0; } - data->trace_idx = idx_next; - - if (unlikely(idx_next != 0 && atomic_read(&data->underrun))) - atomic_inc(&data->underrun); - - entry = data->trace + idx * TRACE_ENTRY_SIZE; + data->trace_current_idx = idx_next; return entry; } @@ -442,21 +465,38 @@ enum trace_file_type { }; static struct trace_entry * -trace_entry_idx(struct trace_array *tr, unsigned long idx, int cpu) +trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, + struct trace_iterator *iter, int cpu) { - struct trace_entry *array = tr->data[cpu]->trace; - unsigned long underrun; + struct page *page; + struct trace_entry *array; - if (idx >= tr->entries) + if (iter->next_idx[cpu] >= tr->entries || + iter->next_idx[cpu] >= data->trace_idx) return NULL; - underrun = atomic_read(&tr->data[cpu]->underrun); - if (underrun) - idx = ((underrun - 1) + idx) % tr->entries; - else if (idx >= tr->data[cpu]->trace_idx) - return NULL; + if (!iter->next_page[cpu]) { + /* + * Initialize. If the count of elements in + * this buffer is greater than the max entries + * we had an underrun. Which means we looped around. + * We can simply use the current pointer as our + * starting point. + */ + if (data->trace_idx >= tr->entries) { + page = virt_to_page(data->trace_current); + iter->next_page[cpu] = &page->lru; + iter->next_page_idx[cpu] = data->trace_current_idx; + } else { + iter->next_page[cpu] = data->trace_pages.next; + iter->next_page_idx[cpu] = 0; + } + } - return &array[idx]; + page = list_entry(iter->next_page[cpu], struct page, lru); + array = page_address(page); + + return &array[iter->next_page_idx[cpu]]; } static struct notrace trace_entry * @@ -470,7 +510,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) for_each_possible_cpu(cpu) { if (!tr->data[cpu]->trace) continue; - ent = trace_entry_idx(tr, iter->next_idx[cpu], cpu); + ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); if (ent && (!next || (long)(next->idx - ent->idx) > 0)) { next = ent; @@ -492,8 +532,19 @@ static void *find_next_entry_inc(struct trace_iterator *iter) next = find_next_entry(iter, &next_cpu); if (next) { - iter->next_idx[next_cpu]++; iter->idx++; + iter->next_idx[next_cpu]++; + iter->next_page_idx[next_cpu]++; + if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { + struct trace_array_cpu *data = iter->tr->data[next_cpu]; + + iter->next_page_idx[next_cpu] = 0; + iter->next_page[next_cpu] = + iter->next_page[next_cpu]->next; + if (iter->next_page[next_cpu] == &data->trace_pages) + iter->next_page[next_cpu] = + data->trace_pages.next; + } } iter->ent = next; iter->cpu = next_cpu; @@ -554,14 +605,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->cpu = 0; iter->idx = -1; - for (i = 0; i < NR_CPUS; i++) + for_each_possible_cpu(i) { iter->next_idx[i] = 0; + iter->next_page[i] = NULL; + } for (p = iter; p && l < *pos; p = s_next(m, p, &l)) ; } else { - l = *pos; + l = *pos - 1; p = s_next(m, p, &l); } @@ -654,9 +707,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) struct trace_array *tr = iter->tr; struct trace_array_cpu *data = tr->data[tr->cpu]; struct tracer *type = current_trace; - unsigned long underruns = 0; - unsigned long underrun; - unsigned long entries = 0; + unsigned long total = 0; + unsigned long entries = 0; int cpu; const char *name = "preemption"; @@ -665,11 +717,10 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) for_each_possible_cpu(cpu) { if (tr->data[cpu]->trace) { - underrun = atomic_read(&tr->data[cpu]->underrun); - if (underrun) { - underruns += underrun; + total += tr->data[cpu]->trace_idx; + if (tr->data[cpu]->trace_idx > tr->entries) entries += tr->entries; - } else + else entries += tr->data[cpu]->trace_idx; } } @@ -682,7 +733,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) " (M:%s VP:%d, KP:%d, SP:%d HP:%d", data->saved_latency, entries, - (entries + underruns), + total, tr->cpu, #if defined(CONFIG_PREEMPT_NONE) "server", @@ -882,8 +933,7 @@ static int trace_empty(struct trace_iterator *iter) data = iter->tr->data[cpu]; if (data->trace && - (data->trace_idx || - atomic_read(&data->underrun))) + data->trace_idx) return 0; } return 1; @@ -1464,42 +1514,109 @@ static struct tracer no_tracer __read_mostly = .name = "none", }; -static inline notrace int page_order(const unsigned long size) +static int trace_alloc_page(void) { - const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); - return ilog2(roundup_pow_of_two(nr_pages)); + struct trace_array_cpu *data; + void *array; + struct page *page, *tmp; + LIST_HEAD(pages); + int i; + + /* first allocate a page for each CPU */ + for_each_possible_cpu(i) { + array = (void *)__get_free_page(GFP_KERNEL); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); + goto free_pages; + } + + page = virt_to_page(array); + list_add(&page->lru, &pages); + +/* Only allocate if we are actually using the max trace */ +#ifdef CONFIG_TRACER_MAX_TRACE + array = (void *)__get_free_page(GFP_KERNEL); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); + goto free_pages; + } + page = virt_to_page(array); + list_add(&page->lru, &pages); +#endif + } + + /* Now that we successfully allocate a page per CPU, add them */ + for_each_possible_cpu(i) { + data = global_trace.data[i]; + page = list_entry(pages.next, struct page, lru); + list_del(&page->lru); + list_add_tail(&page->lru, &data->trace_pages); + ClearPageLRU(page); + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + page = list_entry(pages.next, struct page, lru); + list_del(&page->lru); + list_add_tail(&page->lru, &data->trace_pages); + SetPageLRU(page); +#endif + } + global_trace.entries += ENTRIES_PER_PAGE; + + return 0; + + free_pages: + list_for_each_entry_safe(page, tmp, &pages, lru) { + list_del(&page->lru); + __free_page(page); + } + return -ENOMEM; } __init static int tracer_alloc_buffers(void) { - const int order = page_order(trace_nr_entries * TRACE_ENTRY_SIZE); - const unsigned long size = (1UL << order) << PAGE_SHIFT; - struct trace_entry *array; + struct trace_array_cpu *data; + void *array; + struct page *page; + int pages = 0; int i; + /* Allocate the first page for all buffers */ for_each_possible_cpu(i) { - global_trace.data[i] = &per_cpu(global_trace_cpu, i); + data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); max_tr.data[i] = &per_cpu(max_data, i); - array = (struct trace_entry *) - __get_free_pages(GFP_KERNEL, order); + array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate" - " %ld bytes for trace buffer!\n", size); + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); goto free_buffers; } - global_trace.data[i]->trace = array; + data->trace = array; + + /* set the array to the list */ + INIT_LIST_HEAD(&data->trace_pages); + page = virt_to_page(array); + list_add(&page->lru, &data->trace_pages); + /* use the LRU flag to differentiate the two buffers */ + ClearPageLRU(page); /* Only allocate if we are actually using the max trace */ #ifdef CONFIG_TRACER_MAX_TRACE - array = (struct trace_entry *) - __get_free_pages(GFP_KERNEL, order); + array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { - printk(KERN_ERR "wakeup tracer: failed to allocate" - " %ld bytes for trace buffer!\n", size); + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); goto free_buffers; } max_tr.data[i]->trace = array; + + INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); + page = virt_to_page(array); + list_add(&page->lru, &max_tr.data[i]->trace_pages); + SetPageLRU(page); #endif } @@ -1507,11 +1624,18 @@ __init static int tracer_alloc_buffers(void) * Since we allocate by orders of pages, we may be able to * round up a bit. */ - global_trace.entries = size / TRACE_ENTRY_SIZE; + global_trace.entries = ENTRIES_PER_PAGE; max_tr.entries = global_trace.entries; + pages++; + + while (global_trace.entries < trace_nr_entries) { + if (trace_alloc_page()) + break; + pages++; + } - pr_info("tracer: %ld bytes allocated for %ld", - size, trace_nr_entries); + pr_info("tracer: %d pages allocated for %ld", + pages, trace_nr_entries); pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); pr_info(" actual entries %ld\n", global_trace.entries); @@ -1526,17 +1650,26 @@ __init static int tracer_alloc_buffers(void) free_buffers: for (i-- ; i >= 0; i--) { + struct page *page, *tmp; struct trace_array_cpu *data = global_trace.data[i]; if (data && data->trace) { - free_pages((unsigned long)data->trace, order); + list_for_each_entry_safe(page, tmp, + &data->trace_pages, lru) { + list_del(&page->lru); + __free_page(page); + } data->trace = NULL; } #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; if (data && data->trace) { - free_pages((unsigned long)data->trace, order); + list_for_each_entry_safe(page, tmp, + &data->trace_pages, lru) { + list_del(&page->lru); + __free_page(page); + } data->trace = NULL; } #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3173a93561d..83e257e3808 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -54,9 +54,11 @@ struct trace_entry { */ struct trace_array_cpu { void *trace; + void *trace_current; + unsigned trace_current_idx; + struct list_head trace_pages; unsigned long trace_idx; atomic_t disabled; - atomic_t underrun; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -112,8 +114,10 @@ struct trace_iterator { unsigned long iter_flags; loff_t pos; unsigned long next_idx[NR_CPUS]; + struct list_head *next_page[NR_CPUS]; + unsigned next_page_idx[NR_CPUS]; + long idx; int cpu; - int idx; }; void notrace tracing_reset(struct trace_array_cpu *data); -- cgit v1.2.3-70-g09d2 From 18cef379d30f5ded20cc31d7f2d342639d39919d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: don't use raw_local_irq_save/restore Using raw_local_irq_save/restore confuses lockdep. It's fine to use the normal ones. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 4 ++-- kernel/trace/trace_sched_switch.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d7ad030a4c4..9175ce91b8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -269,7 +269,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!tracer_enabled)) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -278,7 +278,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) ftrace(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3e4771d3b89..2715267be46 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,7 +29,7 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) if (!tracer_enabled) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -38,7 +38,7 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) tracing_sched_switch_trace(tr, data, prev, next, flags); atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) -- cgit v1.2.3-70-g09d2 From 89b2f97819dd074297bbe3e19eaa4afcc98845ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: fix updates to max trace This patch fixes some bugs to the updating of the max trace that was caused by implementing the new buffering. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 6 +++++- kernel/trace/trace_irqsoff.c | 27 +++++++++++++++------------ 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9175ce91b8f..95966561ba3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -153,6 +153,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_tr.data[i], data, sizeof(*data)); data->trace = save_trace; data->trace_pages = save_pages; + tracing_reset(data); } __update_max_tr(tr, tsk, cpu); @@ -183,6 +184,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_tr.data[cpu], data, sizeof(*data)); data->trace = save_trace; data->trace_pages = save_pages; + tracing_reset(data); __update_max_tr(tr, tsk, cpu); spin_unlock(&ftrace_max_lock); @@ -877,6 +879,8 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, entry->ctx.next_prio, comm); break; + default: + seq_printf(m, "Unknown type %d\n", entry->type); } } @@ -1625,7 +1629,6 @@ __init static int tracer_alloc_buffers(void) * round up a bit. */ global_trace.entries = ENTRIES_PER_PAGE; - max_tr.entries = global_trace.entries; pages++; while (global_trace.entries < trace_nr_entries) { @@ -1633,6 +1636,7 @@ __init static int tracer_alloc_buffers(void) break; pages++; } + max_tr.entries = global_trace.entries; pr_info("tracer: %d pages allocated for %ld", pages, trace_nr_entries); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index bd3f8819830..74165f611f3 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -23,6 +23,8 @@ static int tracer_enabled __read_mostly; static DEFINE_PER_CPU(int, tracing_cpu); +static DEFINE_SPINLOCK(max_trace_lock); + enum { TRACER_IRQS_OFF = (1 << 1), TRACER_PREEMPT_OFF = (1 << 2), @@ -126,7 +128,7 @@ check_critical_timing(struct trace_array *tr, int cpu) { unsigned long latency, t0, t1; - cycle_t T0, T1, T2, delta; + cycle_t T0, T1, delta; unsigned long flags; /* @@ -142,20 +144,18 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); - /* - * Update the timestamp, because the trace entry above - * might change it (it can only get larger so the latency - * is fair to be reported): - */ - T2 = now(cpu); + spin_lock(&max_trace_lock); - delta = T2-T0; + /* check if we are still the max latency */ + if (!report_latency(delta)) + goto out_unlock; + + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); latency = nsecs_to_usecs(delta); if (data->critical_sequence != max_sequence) - goto out; + goto out_unlock; tracing_max_latency = delta; t0 = nsecs_to_usecs(T0); @@ -189,6 +189,9 @@ check_critical_timing(struct trace_array *tr, max_sequence++; +out_unlock: + spin_unlock(&max_trace_lock); + out: data->critical_sequence = max_sequence; data->preempt_timestamp = now(cpu); @@ -366,14 +369,14 @@ void notrace trace_preempt_off(unsigned long a0, unsigned long a1) static void start_irqsoff_tracer(struct trace_array *tr) { - tracer_enabled = 1; register_ftrace_function(&trace_ops); + tracer_enabled = 1; } static void stop_irqsoff_tracer(struct trace_array *tr) { - unregister_ftrace_function(&trace_ops); tracer_enabled = 0; + unregister_ftrace_function(&trace_ops); } static void __irqsoff_tracer_init(struct trace_array *tr) -- cgit v1.2.3-70-g09d2 From 57f50be14d57b0dbf88dd019e7bb0ff3a3dc7b81 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: fix max latency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 95966561ba3..9bad2379115 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -69,6 +69,11 @@ static int __init set_nr_entries(char *str) } __setup("trace_entries=", set_nr_entries); +unsigned long nsecs_to_usecs(unsigned long nsecs) +{ + return nsecs / 1000; +} + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -733,7 +738,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) "---------------------------------\n"); seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" " (M:%s VP:%d, KP:%d, SP:%d HP:%d", - data->saved_latency, + nsecs_to_usecs(data->saved_latency), entries, total, tr->cpu, @@ -771,11 +776,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -unsigned long nsecs_to_usecs(unsigned long nsecs) -{ - return nsecs / 1000; -} - static void notrace lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) { -- cgit v1.2.3-70-g09d2 From 60a11774b38fef1ab90b18c5353bd1c7c4d311c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: add self-tests Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/Kconfig | 13 ++ kernel/trace/trace.c | 63 +++++- kernel/trace/trace.h | 31 +++ kernel/trace/trace_functions.c | 3 + kernel/trace/trace_irqsoff.c | 9 + kernel/trace/trace_sched_switch.c | 3 + kernel/trace/trace_sched_wakeup.c | 3 + kernel/trace/trace_selftest.c | 415 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 538 insertions(+), 2 deletions(-) create mode 100644 kernel/trace/trace_selftest.c (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cad9db1dee0..3f73a171024 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -105,3 +105,16 @@ config DYNAMIC_FTRACE wakes up once a second and checks to see if any ftrace calls were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. + +config FTRACE_SELFTEST + bool + +config FTRACE_STARTUP_TEST + bool "Perform a startup test on ftrace" + depends on TRACING + select FTRACE_SELFTEST + help + This option performs a series of startup tests on ftrace. On bootup + a series of tests are made to verify that the tracer is + functioning properly. It will do tests on all the configured + tracers of ftrace. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9bad2379115..f6d026f17db 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -32,6 +32,8 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +static int tracing_disabled = 1; + static long notrace ns2usecs(cycle_t nsec) { @@ -217,11 +219,48 @@ int register_tracer(struct tracer *type) } } +#ifdef CONFIG_FTRACE_STARTUP_TEST + if (type->selftest) { + struct tracer *saved_tracer = current_trace; + struct trace_array_cpu *data; + struct trace_array *tr = &global_trace; + int saved_ctrl = tr->ctrl; + int i; + /* + * Run a selftest on this tracer. + * Here we reset the trace buffer, and set the current + * tracer to be this tracer. The tracer can then run some + * internal tracing to verify that everything is in order. + * If we fail, we do not register this tracer. + */ + for_each_possible_cpu(i) { + if (!data->trace) + continue; + data = tr->data[i]; + tracing_reset(data); + } + current_trace = type; + tr->ctrl = 0; + /* the test is responsible for initializing and enabling */ + pr_info("Testing tracer %s: ", type->name); + ret = type->selftest(type, tr); + /* the test is responsible for resetting too */ + current_trace = saved_tracer; + tr->ctrl = saved_ctrl; + if (ret) { + printk(KERN_CONT "FAILED!\n"); + goto out; + } + printk(KERN_CONT "PASSED\n"); + } +#endif + type->next = trace_types; trace_types = type; len = strlen(type->name); if (len > max_tracer_type_len) max_tracer_type_len = len; + out: mutex_unlock(&trace_types_lock); @@ -985,6 +1024,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) { struct trace_iterator *iter; + if (tracing_disabled) { + *ret = -ENODEV; + return NULL; + } + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { *ret = -ENOMEM; @@ -1023,6 +1067,9 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) int tracing_open_generic(struct inode *inode, struct file *filp) { + if (tracing_disabled) + return -ENODEV; + filp->private_data = inode->i_private; return 0; } @@ -1128,6 +1175,9 @@ static int show_traces_open(struct inode *inode, struct file *file) { int ret; + if (tracing_disabled) + return -ENODEV; + ret = seq_open(file, &show_traces_seq_ops); if (!ret) { struct seq_file *m = file->private_data; @@ -1452,6 +1502,11 @@ struct dentry *tracing_init_dentry(void) return d_tracer; } +#ifdef CONFIG_FTRACE_SELFTEST +/* Let selftest have access to static functions in this file */ +#include "trace_selftest.c" +#endif + static __init void tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -1585,6 +1640,7 @@ __init static int tracer_alloc_buffers(void) void *array; struct page *page; int pages = 0; + int ret = -ENOMEM; int i; /* Allocate the first page for all buffers */ @@ -1650,6 +1706,9 @@ __init static int tracer_alloc_buffers(void) register_tracer(&no_tracer); current_trace = &no_tracer; + /* All seems OK, enable tracing */ + tracing_disabled = 0; + return 0; free_buffers: @@ -1678,7 +1737,7 @@ __init static int tracer_alloc_buffers(void) } #endif } - return -ENOMEM; + return ret; } -device_initcall(tracer_alloc_buffers); +fs_initcall(tracer_alloc_buffers); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 83e257e3808..88edbf1f678 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -99,6 +99,10 @@ struct tracer { void (*start)(struct trace_iterator *iter); void (*stop)(struct trace_iterator *iter); void (*ctrl_update)(struct trace_array *tr); +#ifdef CONFIG_FTRACE_STARTUP_TEST + int (*selftest)(struct tracer *trace, + struct trace_array *tr); +#endif struct tracer *next; int print_max; }; @@ -185,4 +189,31 @@ extern int unregister_tracer_switch(struct tracer_switch_ops *ops); extern unsigned long ftrace_update_tot_cnt; #endif +#ifdef CONFIG_FTRACE_STARTUP_TEST +#ifdef CONFIG_FTRACE +extern int trace_selftest_startup_function(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_IRQSOFF_TRACER +extern int trace_selftest_startup_irqsoff(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_PREEMPT_TRACER +extern int trace_selftest_startup_preemptoff(struct tracer *trace, + struct trace_array *tr); +#endif +#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER) +extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_SCHED_TRACER +extern int trace_selftest_startup_wakeup(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern int trace_selftest_startup_sched_switch(struct tracer *trace, + struct trace_array *tr); +#endif +#endif /* CONFIG_FTRACE_STARTUP_TEST */ + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 82988c5336e..5d8ad7a0960 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -63,6 +63,9 @@ static struct tracer function_trace __read_mostly = .init = function_trace_init, .reset = function_trace_reset, .ctrl_update = function_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_function, +#endif }; static __init int init_function_trace(void) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 74165f611f3..14183b8f79c 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -432,6 +432,9 @@ static struct tracer irqsoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_irqsoff, +#endif }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -455,6 +458,9 @@ static struct tracer preemptoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptoff, +#endif }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -480,6 +486,9 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptirqsoff, +#endif }; # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 2715267be46..6c9284103a6 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -107,6 +107,9 @@ static struct tracer sched_switch_trace __read_mostly = .init = sched_switch_trace_init, .reset = sched_switch_trace_reset, .ctrl_update = sched_switch_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_sched_switch, +#endif }; __init static int init_sched_switch_trace(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7c3ccefcf4c..3d10ff01f80 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -295,6 +295,9 @@ static struct tracer wakeup_tracer __read_mostly = .close = wakeup_tracer_close, .ctrl_update = wakeup_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_wakeup, +#endif }; __init static int init_wakeup_tracer(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c new file mode 100644 index 00000000000..ef4d3cc009f --- /dev/null +++ b/kernel/trace/trace_selftest.c @@ -0,0 +1,415 @@ +/* Include in trace.c */ + +#include + +static inline int trace_valid_entry(struct trace_entry *entry) +{ + switch (entry->type) { + case TRACE_FN: + case TRACE_CTX: + return 1; + } + return 0; +} + +static int +trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) +{ + struct page *page; + struct trace_entry *entries; + int idx = 0; + int i; + + page = list_entry(data->trace_pages.next, struct page, lru); + entries = page_address(page); + + if (data->trace != entries) + goto failed; + + /* + * The starting trace buffer always has valid elements, + * if any element exits. + */ + entries = data->trace; + + for (i = 0; i < tr->entries; i++) { + + if (i < data->trace_idx && + !trace_valid_entry(&entries[idx])) { + printk(KERN_CONT ".. invalid entry %d ", entries[idx].type); + goto failed; + } + + idx++; + if (idx >= ENTRIES_PER_PAGE) { + page = virt_to_page(entries); + if (page->lru.next == &data->trace_pages) { + if (i != tr->entries - 1) { + printk(KERN_CONT ".. entries buffer mismatch"); + goto failed; + } + } else { + page = list_entry(page->lru.next, struct page, lru); + entries = page_address(page); + } + idx = 0; + } + } + + page = virt_to_page(entries); + if (page->lru.next != &data->trace_pages) { + printk(KERN_CONT ".. too many entries"); + goto failed; + } + + return 0; + + failed: + printk(KERN_CONT ".. corrupted trace buffer .. "); + return -1; +} + +/* + * Test the trace buffer to see if all the elements + * are still sane. + */ +static int trace_test_buffer(struct trace_array *tr, unsigned long *count) +{ + unsigned long cnt = 0; + int cpu; + int ret = 0; + + for_each_possible_cpu(cpu) { + if (!tr->data[cpu]->trace) + continue; + + cnt += tr->data[cpu]->trace_idx; + printk("%d: count = %ld\n", cpu, cnt); + + ret = trace_test_buffer_cpu(tr, tr->data[cpu]); + if (ret) + break; + } + + if (count) + *count = cnt; + + return ret; +} + +#ifdef CONFIG_FTRACE +/* + * Simple verification test of ftrace function tracer. + * Enable ftrace, sleep 1/10 second, and then read the trace + * buffer to see if all is in order. + */ +int +trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* make sure functions have been recorded */ + ret = ftrace_force_update(); + if (ret) { + printk(KERN_CONT ".. ftraced failed .. "); + return ret; + } + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_FTRACE */ + +#ifdef CONFIG_IRQSOFF_TRACER +int +trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + /* disable interrupts for a bit */ + local_irq_disable(); + udelay(100); + local_irq_enable(); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_IRQSOFF_TRACER */ + +#ifdef CONFIG_PREEMPT_TRACER +int +trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + /* disable preemption for a bit */ + preempt_disable(); + udelay(100); + preempt_enable(); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_PREEMPT_TRACER */ + +#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER) +int +trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + + /* reset the max latency */ + tracing_max_latency = 0; + + /* disable preemption and interrupts for a bit */ + preempt_disable(); + local_irq_disable(); + udelay(100); + preempt_enable(); + /* reverse the order of preempt vs irqs */ + local_irq_enable(); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (ret) + goto out; + + ret = trace_test_buffer(&max_tr, &count); + if (ret) + goto out; + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + goto out; + } + + /* do the test by disabling interrupts first this time */ + tracing_max_latency = 0; + tr->ctrl = 1; + trace->ctrl_update(tr); + preempt_disable(); + local_irq_disable(); + udelay(100); + preempt_enable(); + /* reverse the order of preempt vs irqs */ + local_irq_enable(); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (ret) + goto out; + + ret = trace_test_buffer(&max_tr, &count); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + goto out; + } + + out: + trace->reset(tr); + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ + +#ifdef CONFIG_SCHED_TRACER +static int trace_wakeup_test_thread(void *data) +{ + struct completion *x = data; + + /* Make this a RT thread, doesn't need to be too high */ + + rt_mutex_setprio(current, MAX_RT_PRIO - 5); + + /* Make it know we have a new prio */ + complete(x); + + /* now go to sleep and let the test wake us up */ + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + /* we are awake, now wait to disappear */ + while (!kthread_should_stop()) { + /* + * This is an RT task, do short sleeps to let + * others run. + */ + msleep(100); + } + + return 0; +} + +int +trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + struct task_struct *p; + struct completion isrt; + unsigned long count; + int ret; + + init_completion(&isrt); + + /* create a high prio thread */ + p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); + if (!IS_ERR(p)) { + printk(KERN_CONT "Failed to create ftrace wakeup test thread "); + return -1; + } + + /* make sure the thread is running at an RT prio */ + wait_for_completion(&isrt); + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + + /* sleep to let the RT thread sleep too */ + msleep(100); + + /* + * Yes this is slightly racy. It is possible that for some + * strange reason that the RT thread we created, did not + * call schedule for 100ms after doing the completion, + * and we do a wakeup on a task that already is awake. + * But that is extremely unlikely, and the worst thing that + * happens in such a case, is that we disable tracing. + * Honestly, if this race does happen something is horrible + * wrong with the system. + */ + + wake_up_process(p); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + + + trace->reset(tr); + + tracing_max_latency = save_max; + + /* kill the thread */ + kthread_stop(p); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_SCHED_TRACER */ + +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +int +trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ + +#ifdef CONFIG_DYNAMIC_FTRACE +#endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v1.2.3-70-g09d2 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 8 +-- kernel/trace/trace.c | 144 ++++++++++++++++++++++++-------------- kernel/trace/trace.h | 8 ++- kernel/trace/trace_irqsoff.c | 32 ++++----- kernel/trace/trace_sched_wakeup.c | 18 ++--- kernel/trace/trace_selftest.c | 25 ++++--- 7 files changed, 134 insertions(+), 103 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c6523..953a36d6a19 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4facf5ceeb8..6d4d2e86deb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1152,10 +1152,10 @@ static int __init notrace ftrace_dynamic_init(void) core_initcall(ftrace_dynamic_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) -# define ftrace_startup_sysctl() do { } while (0) -# define ftrace_shutdown_sysctl() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f6d026f17db..61d2f022886 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -142,12 +142,59 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +void check_pages(struct trace_array_cpu *data) +{ + struct page *page, *tmp; + + BUG_ON(data->trace_pages.next->prev != &data->trace_pages); + BUG_ON(data->trace_pages.prev->next != &data->trace_pages); + + list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { + BUG_ON(page->lru.next->prev != &page->lru); + BUG_ON(page->lru.prev->next != &page->lru); + } +} + +void *head_page(struct trace_array_cpu *data) +{ + struct page *page; + + check_pages(data); + if (list_empty(&data->trace_pages)) + return NULL; + + page = list_entry(data->trace_pages.next, struct page, lru); + BUG_ON(&page->lru == &data->trace_pages); + + return page_address(page); +} + +notrace static void +flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) +{ + struct list_head flip_pages; + + INIT_LIST_HEAD(&flip_pages); + + tr1->trace_current = NULL; + memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx, + sizeof(struct trace_array_cpu) - + offsetof(struct trace_array_cpu, trace_current_idx)); + + check_pages(tr1); + check_pages(tr2); + list_splice_init(&tr1->trace_pages, &flip_pages); + list_splice_init(&tr2->trace_pages, &tr1->trace_pages); + list_splice_init(&flip_pages, &tr2->trace_pages); + BUG_ON(!list_empty(&flip_pages)); + check_pages(tr1); + check_pages(tr2); +} + notrace void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -155,11 +202,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; - save_trace = max_tr.data[i]->trace; - save_pages = max_tr.data[i]->trace_pages; - memcpy(max_tr.data[i], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[i], data); tracing_reset(data); } @@ -177,8 +220,6 @@ notrace void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -186,11 +227,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); - save_trace = max_tr.data[cpu]->trace; - save_pages = max_tr.data[cpu]->trace_pages; - memcpy(max_tr.data[cpu], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[cpu], data); + tracing_reset(data); __update_max_tr(tr, tsk, cpu); @@ -234,9 +272,9 @@ int register_tracer(struct tracer *type) * If we fail, we do not register this tracer. */ for_each_possible_cpu(i) { - if (!data->trace) - continue; data = tr->data[i]; + if (!head_page(data)) + continue; tracing_reset(data); } current_trace = type; @@ -298,7 +336,7 @@ void unregister_tracer(struct tracer *type) void notrace tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - data->trace_current = data->trace; + data->trace_current = head_page(data); data->trace_current_idx = 0; } @@ -425,26 +463,31 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) } static inline notrace struct trace_entry * -tracing_get_trace_entry(struct trace_array *tr, - struct trace_array_cpu *data) +tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; struct trace_entry *entry; - struct page *page; struct list_head *next; + struct page *page; data->trace_idx++; idx = data->trace_current_idx; idx_next = idx + 1; + BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); + entry = data->trace_current + idx * TRACE_ENTRY_SIZE; if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { page = virt_to_page(data->trace_current); - if (unlikely(&page->lru == data->trace_pages.prev)) - next = data->trace_pages.next; - else - next = page->lru.next; + /* + * Roundrobin - but skip the head (which is not a real page): + */ + next = page->lru.next; + if (unlikely(next == &data->trace_pages)) + next = next->next; + BUG_ON(next == &data->trace_pages); + page = list_entry(next, struct page, lru); data->trace_current = page_address(page); idx_next = 0; @@ -456,18 +499,17 @@ tracing_get_trace_entry(struct trace_array *tr, } static inline notrace void -tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags) +tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; unsigned long pc; pc = preempt_count(); - entry->idx = atomic_inc_return(&tracer_counter); - entry->preempt_count = pc & 0xff; - entry->pid = tsk->pid; - entry->t = now(raw_smp_processor_id()); + entry->idx = atomic_inc_return(&tracer_counter); + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -476,16 +518,15 @@ tracing_generic_entry_update(struct trace_entry *entry, notrace void ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, - unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); - entry->type = TRACE_FN; - entry->fn.ip = ip; - entry->fn.parent_ip = parent_ip; + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; } notrace void @@ -496,7 +537,7 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; entry->ctx.prev_pid = prev->pid; @@ -540,6 +581,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, } page = list_entry(iter->next_page[cpu], struct page, lru); + BUG_ON(&data->trace_pages == &page->lru); + array = page_address(page); return &array[iter->next_page_idx[cpu]]; @@ -554,7 +597,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) int cpu; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); if (ent && @@ -762,7 +805,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) name = type->name; for_each_possible_cpu(cpu) { - if (tr->data[cpu]->trace) { + if (head_page(tr->data[cpu])) { total += tr->data[cpu]->trace_idx; if (tr->data[cpu]->trace_idx > tr->entries) entries += tr->entries; @@ -975,8 +1018,7 @@ static int trace_empty(struct trace_iterator *iter) for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; - if (data->trace && - data->trace_idx) + if (head_page(data) && data->trace_idx) return 0; } return 1; @@ -1576,9 +1618,9 @@ static struct tracer no_tracer __read_mostly = static int trace_alloc_page(void) { struct trace_array_cpu *data; - void *array; struct page *page, *tmp; LIST_HEAD(pages); + void *array; int i; /* first allocate a page for each CPU */ @@ -1610,14 +1652,14 @@ static int trace_alloc_page(void) for_each_possible_cpu(i) { data = global_trace.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); ClearPageLRU(page); #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); SetPageLRU(page); #endif @@ -1628,7 +1670,7 @@ static int trace_alloc_page(void) free_pages: list_for_each_entry_safe(page, tmp, &pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } return -ENOMEM; @@ -1654,7 +1696,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - data->trace = array; /* set the array to the list */ INIT_LIST_HEAD(&data->trace_pages); @@ -1671,7 +1712,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - max_tr.data[i]->trace = array; INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); page = virt_to_page(array); @@ -1716,24 +1756,22 @@ __init static int tracer_alloc_buffers(void) struct page *page, *tmp; struct trace_array_cpu *data = global_trace.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #endif } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 88edbf1f678..cc1d34b8b77 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,12 +53,12 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - void *trace; void *trace_current; - unsigned trace_current_idx; struct list_head trace_pages; - unsigned long trace_idx; atomic_t disabled; + /* these fields get copied into max-trace: */ + unsigned trace_current_idx; + unsigned long trace_idx; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -216,4 +216,6 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, #endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ +extern void *head_page(struct trace_array_cpu *data); + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 14183b8f79c..2dfebb67fdf 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -144,7 +144,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - spin_lock(&max_trace_lock); + spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ if (!report_latency(delta)) @@ -165,32 +165,24 @@ check_critical_timing(struct trace_array *tr, update_max_tr_single(tr, current, cpu); - if (tracing_thresh) - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us critical section violates %lu us threshold.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); - else + latency, nsecs_to_usecs(tracing_thresh)); + } else { printk(KERN_INFO "(%16s-%-5d|#%d):" - " new %lu us maximum-latency " - "critical section.\n => started at timestamp %lu: ", + " new %lu us maximum-latency critical section.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, t0); - - print_symbol(KERN_CONT "<%s>\n", data->critical_start); - printk(KERN_CONT " => ended at timestamp %lu: ", t1); - print_symbol(KERN_CONT "<%s>\n", data->critical_end); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " => dump-end timestamp %lu\n\n", t1); + latency); + } max_sequence++; out_unlock: - spin_unlock(&max_trace_lock); + spin_unlock_irqrestore(&max_trace_lock, flags); out: data->critical_sequence = max_sequence; @@ -216,7 +208,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || atomic_read(&data->disabled)) return; @@ -256,7 +248,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || !data->critical_start || atomic_read(&data->disabled)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3d10ff01f80..688df965f3f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -107,24 +107,18 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) update_max_tr(tr, wakeup_task, wakeup_cpu); if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us wakeup latency violates %lu us threshold.\n", wakeup_task->comm, wakeup_task->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); + latency, nsecs_to_usecs(tracing_thresh)); } else { - printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " - "wakeup latency.\n => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " new %lu us maximum wakeup latency.\n", wakeup_task->comm, wakeup_task->pid, - cpu, latency, t0); + cpu, latency); } - printk(KERN_CONT " ended at timestamp %lu: ", t1); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); - out_unlock: __wakeup_reset(tr); spin_unlock_irqrestore(&wakeup_lock, flags); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ef4d3cc009f..c01874c3b1f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1,6 +1,7 @@ /* Include in trace.c */ #include +#include static inline int trace_valid_entry(struct trace_entry *entry) { @@ -15,28 +16,29 @@ static inline int trace_valid_entry(struct trace_entry *entry) static int trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) { - struct page *page; struct trace_entry *entries; + struct page *page; int idx = 0; int i; + BUG_ON(list_empty(&data->trace_pages)); page = list_entry(data->trace_pages.next, struct page, lru); entries = page_address(page); - if (data->trace != entries) + if (head_page(data) != entries) goto failed; /* * The starting trace buffer always has valid elements, - * if any element exits. + * if any element exists. */ - entries = data->trace; + entries = head_page(data); for (i = 0; i < tr->entries; i++) { - if (i < data->trace_idx && - !trace_valid_entry(&entries[idx])) { - printk(KERN_CONT ".. invalid entry %d ", entries[idx].type); + if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) { + printk(KERN_CONT ".. invalid entry %d ", + entries[idx].type); goto failed; } @@ -80,11 +82,10 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int ret = 0; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; cnt += tr->data[cpu]->trace_idx; - printk("%d: count = %ld\n", cpu, cnt); ret = trace_test_buffer_cpu(tr, tr->data[cpu]); if (ret) @@ -117,6 +118,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ + ftrace_enabled = 1; + tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ @@ -124,6 +127,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tr->ctrl = 0; trace->ctrl_update(tr); + ftrace_enabled = 0; + /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); @@ -328,7 +333,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* create a high prio thread */ p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); - if (!IS_ERR(p)) { + if (IS_ERR(p)) { printk(KERN_CONT "Failed to create ftrace wakeup test thread "); return -1; } -- cgit v1.2.3-70-g09d2 From 7bd2f24c2f769e3f8f1d4fc8b9fddf689825f6a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: add README make it easier for newbies to find their way around. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 61d2f022886..c736dd2e068 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -995,6 +995,7 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) seq_printf(m, " <-"); seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); } + seq_printf(m, "\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? @@ -1007,7 +1008,6 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) entry->ctx.next_prio); break; } - seq_printf(m, "\n"); } static int trace_empty(struct trace_iterator *iter) @@ -1332,6 +1332,39 @@ static struct file_operations tracing_iter_fops = { .write = tracing_iter_ctrl_write, }; +static const char readme_msg[] = + "tracing mini-HOWTO:\n\n" + "# mkdir /debug\n" + "# mount -t debugfs nodev /debug\n\n" + "# cat /debug/tracing/available_tracers\n" + "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" + "# cat /debug/tracing/current_tracer\n" + "none\n" + "# echo sched_switch > /debug/tracing/current_tracer\n" + "# cat /debug/tracing/current_tracer\n" + "sched_switch\n" + "# cat /debug/tracing/iter_ctrl\n" + "noprint-parent nosym-offset nosym-addr noverbose\n" + "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo 1 > /debug/tracing/tracing_enabled\n" + "# cat /debug/tracing/trace > /tmp/trace.txt\n" + "echo 0 > /debug/tracing/tracing_enabled\n" +; + +static ssize_t +tracing_readme_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_read_from_buffer(ubuf, cnt, ppos, + readme_msg, strlen(readme_msg)); +} + +static struct file_operations tracing_readme_fops = { + .open = tracing_open_generic, + .read = tracing_readme_read, +}; + + static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -1598,6 +1631,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'tracing_threash' entry\n"); + entry = debugfs_create_file("README", 0644, d_tracer, + NULL, &tracing_readme_fops); + if (!entry) + pr_warning("Could not create debugfs 'README' entry\n"); + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, -- cgit v1.2.3-70-g09d2 From 4e3c3333f3bd7eedfd21b1155b3c7cd24fc7f754 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: fix time offset fix time offset calculations and ordering, plus make code more consistent. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 75 ++++++++++++++++++++++++++++++++++++++-------------- kernel/trace/trace.h | 9 ++++++- 2 files changed, 63 insertions(+), 21 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c736dd2e068..8755a437048 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(ftrace_max_lock); * structure. (this way the maximum trace is permanently saved, * for later retrieval via /debugfs/tracing/latency_trace) */ -static void notrace +static notrace void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -333,15 +333,16 @@ void unregister_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); } -void notrace tracing_reset(struct trace_array_cpu *data) +notrace void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; data->trace_current = head_page(data); data->trace_current_idx = 0; + data->time_offset = 0; } #ifdef CONFIG_FTRACE -static void notrace +static notrace void function_trace_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; @@ -398,7 +399,7 @@ static void trace_init_cmdlines(void) notrace void trace_stop_cmdline_recording(void); -static void notrace trace_save_cmdline(struct task_struct *tsk) +static notrace void trace_save_cmdline(struct task_struct *tsk) { unsigned map; unsigned idx; @@ -624,6 +625,7 @@ static void *find_next_entry_inc(struct trace_iterator *iter) iter->idx++; iter->next_idx[next_cpu]++; iter->next_page_idx[next_cpu]++; + if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { struct trace_array_cpu *data = iter->tr->data[next_cpu]; @@ -635,19 +637,21 @@ static void *find_next_entry_inc(struct trace_iterator *iter) data->trace_pages.next; } } + iter->prev_ent = iter->ent; + iter->prev_cpu = iter->cpu; + iter->ent = next; iter->cpu = next_cpu; return next ? iter : NULL; } -static void notrace * -s_next(struct seq_file *m, void *v, loff_t *pos) +static notrace void *s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_iterator *iter = m->private; - void *ent; void *last_ent = iter->ent; int i = (int)*pos; + void *ent; (*pos)++; @@ -693,6 +697,8 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->ent = NULL; iter->cpu = 0; iter->idx = -1; + iter->prev_ent = NULL; + iter->prev_cpu = -1; for_each_possible_cpu(i) { iter->next_idx[i] = 0; @@ -752,7 +758,7 @@ seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) # define IP_FMT "%016lx" #endif -static void notrace +static notrace void seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) { if (!ip) { @@ -769,7 +775,7 @@ seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) seq_printf(m, " <" IP_FMT ">", ip); } -static void notrace print_lat_help_header(struct seq_file *m) +static notrace void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); seq_puts(m, "# / _-----=> irqs-off \n"); @@ -782,14 +788,14 @@ static void notrace print_lat_help_header(struct seq_file *m) seq_puts(m, "# \\ / ||||| \\ | / \n"); } -static void notrace print_func_help_header(struct seq_file *m) +static notrace void print_func_help_header(struct seq_file *m) { seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); seq_puts(m, "# | | | | |\n"); } -static void notrace +static notrace void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -858,7 +864,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static void notrace +static notrace void lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) { int hardirq, softirq; @@ -895,7 +901,7 @@ lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) unsigned long preempt_mark_thresh = 100; -static void notrace +static notrace void lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, unsigned long rel_usecs) { @@ -910,7 +916,7 @@ lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static void notrace +static notrace void print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -966,20 +972,50 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, } } -static void notrace +static notrace void sync_time_offset(struct trace_iterator *iter) +{ + struct trace_array_cpu *prev_array, *array; + struct trace_entry *prev_entry, *entry; + cycle_t prev_t, t; + + entry = iter->ent; + prev_entry = iter->prev_ent; + if (!prev_entry) + return; + + prev_array = iter->tr->data[iter->prev_cpu]; + array = iter->tr->data[iter->cpu]; + + prev_t = prev_entry->t + prev_array->time_offset; + t = entry->t + array->time_offset; + + /* + * If time goes backwards we increase the offset of + * the current array, to not have observable time warps. + * This will quickly synchronize the time offsets of + * multiple CPUs: + */ + if (t < prev_t) + array->time_offset += prev_t - t; +} + +static notrace void print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_entry *entry = iter->ent; + struct trace_entry *entry; unsigned long usec_rem; unsigned long long t; unsigned long secs; char *comm; int S; + sync_time_offset(iter); + entry = iter->ent; + comm = trace_find_cmdline(iter->ent->pid); - t = ns2usecs(entry->t); + t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; @@ -1158,7 +1194,7 @@ static int tracing_lt_open(struct inode *inode, struct file *file) } -static void notrace * +static notrace void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct tracer *t = m->private; @@ -1374,8 +1410,7 @@ tracing_ctrl_read(struct file *filp, char __user *ubuf, int r; r = sprintf(buf, "%ld\n", tr->ctrl); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc1d34b8b77..5df8ff2b84a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,6 +56,8 @@ struct trace_array_cpu { void *trace_current; struct list_head trace_pages; atomic_t disabled; + cycle_t time_offset; + /* these fields get copied into max-trace: */ unsigned trace_current_idx; unsigned long trace_idx; @@ -114,14 +116,19 @@ struct tracer { struct trace_iterator { struct trace_array *tr; struct tracer *trace; + struct trace_entry *ent; + int cpu; + + struct trace_entry *prev_ent; + int prev_cpu; + unsigned long iter_flags; loff_t pos; unsigned long next_idx[NR_CPUS]; struct list_head *next_page[NR_CPUS]; unsigned next_page_idx[NR_CPUS]; long idx; - int cpu; }; void notrace tracing_reset(struct trace_array_cpu *data); -- cgit v1.2.3-70-g09d2 From 1d4db00a5e30c7b8f8dc2a1b19e886fd942be143 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: reset selftests The tests may leave stuff in the buffers. This resets the buffers after each test is run. If a test fails, it does not reset the buffer to avoid touching a buffer that is corrupted. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8755a437048..6580e7ed04b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -289,6 +289,13 @@ int register_tracer(struct tracer *type) printk(KERN_CONT "FAILED!\n"); goto out; } + /* Only reset on passing, to avoid touching corrupted buffers */ + for_each_possible_cpu(i) { + data = tr->data[i]; + if (!head_page(data)) + continue; + tracing_reset(data); + } printk(KERN_CONT "PASSED\n"); } #endif -- cgit v1.2.3-70-g09d2 From 93a588f459da134be6ab17c4104e28441beb0d22 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: change buffers to producer consumer This patch changes the way the CPU trace buffers are handled. Instead of always starting from the trace page head, the logic is changed to a producer consumer logic. This allows for the buffers to be drained while they are alive. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 103 ++++++++++++++++++++++++++++++--------------------- kernel/trace/trace.h | 6 ++- 2 files changed, 65 insertions(+), 44 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6580e7ed04b..777b859e1c2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,10 +176,9 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) INIT_LIST_HEAD(&flip_pages); - tr1->trace_current = NULL; - memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx, + memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx, sizeof(struct trace_array_cpu) - - offsetof(struct trace_array_cpu, trace_current_idx)); + offsetof(struct trace_array_cpu, trace_head_idx)); check_pages(tr1); check_pages(tr2); @@ -228,7 +227,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_reset(max_tr.data[i]); flip_trace(max_tr.data[cpu], data); - tracing_reset(data); __update_max_tr(tr, tsk, cpu); @@ -343,9 +341,9 @@ void unregister_tracer(struct tracer *type) notrace void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - data->trace_current = head_page(data); - data->trace_current_idx = 0; - data->time_offset = 0; + data->trace_head = data->trace_tail = head_page(data); + data->trace_head_idx = 0; + data->trace_tail_idx = 0; } #ifdef CONFIG_FTRACE @@ -470,38 +468,65 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) trace_save_cmdline(tsk); } +static inline notrace struct list_head * +trace_next_list(struct trace_array_cpu *data, struct list_head *next) +{ + /* + * Roundrobin - but skip the head (which is not a real page): + */ + next = next->next; + if (unlikely(next == &data->trace_pages)) + next = next->next; + BUG_ON(next == &data->trace_pages); + + return next; +} + +static inline notrace void * +trace_next_page(struct trace_array_cpu *data, void *addr) +{ + struct list_head *next; + struct page *page; + + page = virt_to_page(addr); + + next = trace_next_list(data, &page->lru); + page = list_entry(next, struct page, lru); + + return page_address(page); +} + static inline notrace struct trace_entry * tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; struct trace_entry *entry; - struct list_head *next; - struct page *page; data->trace_idx++; - idx = data->trace_current_idx; + idx = data->trace_head_idx; idx_next = idx + 1; BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); - entry = data->trace_current + idx * TRACE_ENTRY_SIZE; + entry = data->trace_head + idx * TRACE_ENTRY_SIZE; if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { - page = virt_to_page(data->trace_current); - /* - * Roundrobin - but skip the head (which is not a real page): - */ - next = page->lru.next; - if (unlikely(next == &data->trace_pages)) - next = next->next; - BUG_ON(next == &data->trace_pages); - - page = list_entry(next, struct page, lru); - data->trace_current = page_address(page); + data->trace_head = trace_next_page(data, data->trace_head); idx_next = 0; } - data->trace_current_idx = idx_next; + if (data->trace_head == data->trace_tail && + idx_next == data->trace_tail_idx) { + /* overrun */ + data->trace_tail_idx++; + if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { + data->trace_tail = + trace_next_page(data, data->trace_tail); + data->trace_tail_idx = 0; + } + } + + data->trace_head_idx = idx_next; return entry; } @@ -571,21 +596,11 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return NULL; if (!iter->next_page[cpu]) { - /* - * Initialize. If the count of elements in - * this buffer is greater than the max entries - * we had an underrun. Which means we looped around. - * We can simply use the current pointer as our - * starting point. - */ - if (data->trace_idx >= tr->entries) { - page = virt_to_page(data->trace_current); - iter->next_page[cpu] = &page->lru; - iter->next_page_idx[cpu] = data->trace_current_idx; - } else { - iter->next_page[cpu] = data->trace_pages.next; - iter->next_page_idx[cpu] = 0; - } + /* Initialize the iterator for this cpu trace buffer */ + WARN_ON(!data->trace_tail); + page = virt_to_page(data->trace_tail); + iter->next_page[cpu] = &page->lru; + iter->next_page_idx[cpu] = data->trace_tail_idx; } page = list_entry(iter->next_page[cpu], struct page, lru); @@ -593,6 +608,12 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, array = page_address(page); + /* Still possible to catch up to the tail */ + if (iter->next_idx[cpu] && array == data->trace_tail && + iter->next_page_idx[cpu] == data->trace_tail_idx) + return NULL; + + WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); return &array[iter->next_page_idx[cpu]]; } @@ -638,10 +659,8 @@ static void *find_next_entry_inc(struct trace_iterator *iter) iter->next_page_idx[next_cpu] = 0; iter->next_page[next_cpu] = - iter->next_page[next_cpu]->next; - if (iter->next_page[next_cpu] == &data->trace_pages) - iter->next_page[next_cpu] = - data->trace_pages.next; + trace_next_list(data, iter->next_page[next_cpu]); + } } iter->prev_ent = iter->ent; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5df8ff2b84a..0ce127455b4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,13 +53,15 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - void *trace_current; struct list_head trace_pages; atomic_t disabled; cycle_t time_offset; /* these fields get copied into max-trace: */ - unsigned trace_current_idx; + unsigned trace_head_idx; + unsigned trace_tail_idx; + void *trace_head; /* producer */ + void *trace_tail; /* consumer */ unsigned long trace_idx; unsigned long saved_latency; unsigned long critical_start; -- cgit v1.2.3-70-g09d2 From 214023c3d13a71525e463b5e54e360b926b4dc90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: add a buffer for output Later patches will need to print the same things as the seq output does. But those outputs will not use the seq utility. This patch adds a buffer to the iterator, that can be used by either the seq utility or other output. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 202 ++++++++++++++++++++++++++++++++++----------------- kernel/trace/trace.h | 6 ++ 2 files changed, 140 insertions(+), 68 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 777b859e1c2..d39f4faec7c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -169,6 +169,66 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } +static notrace int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + int len = (PAGE_SIZE - 1) - s->len; + va_list ap; + + if (!len) + return 0; + + va_start(ap, fmt); + len = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + s->len += len; + + return len; +} + +static notrace int +trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len = strlen(str); + + if (len > ((PAGE_SIZE - 1) - s->len)) + len = (PAGE_SIZE - 1) - s->len; + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +static notrace int +trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + if (s->len >= (PAGE_SIZE - 1)) + return 0; + + s->buffer[s->len++] = c; + + return 1; +} + +static notrace void +trace_seq_reset(struct trace_seq *s) +{ + s->len = 0; +} + +static notrace void +trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = 0; + seq_puts(m, s->buffer); + + trace_seq_reset(s); +} + notrace static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { @@ -756,25 +816,26 @@ static void s_stop(struct seq_file *m, void *p) } static void -seq_print_sym_short(struct seq_file *m, const char *fmt, unsigned long address) +seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(address, NULL, NULL, NULL, str); - seq_printf(m, fmt, str); + trace_seq_printf(s, fmt, str); #endif } static void -seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) +seq_print_sym_offset(struct trace_seq *s, const char *fmt, + unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; sprint_symbol(str, address); - seq_printf(m, fmt, str); + trace_seq_printf(s, fmt, str); #endif } @@ -785,20 +846,20 @@ seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) #endif static notrace void -seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { if (!ip) { - seq_printf(m, "0"); + trace_seq_printf(s, "0"); return; } if (sym_flags & TRACE_ITER_SYM_OFFSET) - seq_print_sym_offset(m, "%s", ip); + seq_print_sym_offset(s, "%s", ip); else - seq_print_sym_short(m, "%s", ip); + seq_print_sym_short(s, "%s", ip); if (sym_flags & TRACE_ITER_SYM_ADDR) - seq_printf(m, " <" IP_FMT ">", ip); + trace_seq_printf(s, " <" IP_FMT ">", ip); } static notrace void print_lat_help_header(struct seq_file *m) @@ -881,9 +942,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) if (data->critical_start) { seq_puts(m, " => started at: "); - seq_print_ip_sym(m, data->critical_start, sym_flags); + seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); + trace_print_seq(m, &iter->seq); seq_puts(m, "\n => ended at: "); - seq_print_ip_sym(m, data->critical_end, sym_flags); + seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); + trace_print_seq(m, &iter->seq); seq_puts(m, "\n"); } @@ -891,61 +954,61 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) } static notrace void -lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) +lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; char *comm; comm = trace_find_cmdline(entry->pid); - seq_printf(m, "%8.8s-%-5d ", comm, entry->pid); - seq_printf(m, "%d", cpu); - seq_printf(m, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', - ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%d", cpu); + trace_seq_printf(s, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; if (hardirq && softirq) - seq_putc(m, 'H'); + trace_seq_putc(s, 'H'); else { if (hardirq) - seq_putc(m, 'h'); + trace_seq_putc(s, 'h'); else { if (softirq) - seq_putc(m, 's'); + trace_seq_putc(s, 's'); else - seq_putc(m, '.'); + trace_seq_putc(s, '.'); } } if (entry->preempt_count) - seq_printf(m, "%x", entry->preempt_count); + trace_seq_printf(s, "%x", entry->preempt_count); else - seq_puts(m, "."); + trace_seq_puts(s, "."); } unsigned long preempt_mark_thresh = 100; static notrace void -lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, +lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, unsigned long rel_usecs) { - seq_printf(m, " %4lldus", abs_usecs); + trace_seq_printf(s, " %4lldus", abs_usecs); if (rel_usecs > preempt_mark_thresh) - seq_puts(m, "!: "); + trace_seq_puts(s, "!: "); else if (rel_usecs > 1) - seq_puts(m, "+: "); + trace_seq_puts(s, "+: "); else - seq_puts(m, " : "); + trace_seq_puts(s, " : "); } static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; static notrace void -print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, - unsigned int trace_idx, int cpu) +print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { + struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *next_entry = find_next_entry(iter, NULL); unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); @@ -962,39 +1025,40 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, if (verbose) { comm = trace_find_cmdline(entry->pid); - seq_printf(m, "%16s %5d %d %d %08x %08x [%08lx]" - " %ld.%03ldms (+%ld.%03ldms): ", - comm, - entry->pid, cpu, entry->flags, - entry->preempt_count, trace_idx, - ns2usecs(entry->t), - abs_usecs/1000, - abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + comm, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, + ns2usecs(entry->t), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, + rel_usecs % 1000); } else { - lat_print_generic(m, entry, cpu); - lat_print_timestamp(m, abs_usecs, rel_usecs); + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); } switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(m, entry->fn.ip, sym_flags); - seq_puts(m, " ("); - seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); - seq_puts(m, ")\n"); + seq_print_ip_sym(s, entry->fn.ip, sym_flags); + trace_seq_puts(s, " ("); + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + trace_seq_puts(s, ")\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); - seq_printf(m, " %d:%d:%c --> %d:%d %s\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio, - comm); + trace_seq_printf(s, " %d:%d:%c --> %d:%d %s\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio, + comm); break; default: - seq_printf(m, "Unknown type %d\n", entry->type); + trace_seq_printf(s, "Unknown type %d\n", entry->type); } } @@ -1026,8 +1090,9 @@ static notrace void sync_time_offset(struct trace_iterator *iter) } static notrace void -print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) +print_trace_fmt(struct trace_iterator *iter) { + struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; unsigned long usec_rem; @@ -1045,29 +1110,29 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - seq_printf(m, "%16s-%-5d ", comm, entry->pid); - seq_printf(m, "[%02d] ", iter->cpu); - seq_printf(m, "%5lu.%06lu: ", secs, usec_rem); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "[%02d] ", iter->cpu); + trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(m, entry->fn.ip, sym_flags); + seq_print_ip_sym(s, entry->fn.ip, sym_flags); if ((sym_flags & TRACE_ITER_PRINT_PARENT) && entry->fn.parent_ip) { - seq_printf(m, " <-"); - seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + trace_seq_printf(s, " <-"); + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); } - seq_printf(m, "\n"); + trace_seq_printf(s, "\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - seq_printf(m, " %d:%d:%c ==> %d:%d\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio); + trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); break; } } @@ -1108,9 +1173,10 @@ static int s_show(struct seq_file *m, void *v) } } else { if (iter->iter_flags & TRACE_FILE_LAT_FMT) - print_lat_fmt(m, iter, iter->idx, iter->cpu); + print_lat_fmt(iter, iter->idx, iter->cpu); else - print_trace_fmt(m, iter); + print_trace_fmt(iter); + trace_print_seq(m, &iter->seq); } return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0ce127455b4..f5b32ca0b45 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -111,11 +111,17 @@ struct tracer { int print_max; }; +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; +}; + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: */ struct trace_iterator { + struct trace_seq seq; struct trace_array *tr; struct tracer *trace; -- cgit v1.2.3-70-g09d2 From b3806b4316306dc9c542eff6c23d7d42918f3504 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: user run time file reading This patch creates a file called trace_pipe in the tracing debug directory. This file is a consumer of the trace buffers. This means that reads of this file consumes the entries from the trace buffers so that they will not be read a second time, as contrast to the static buffers latency_trace and trace. Reading from the trace_pipe will remove the entries from trace and latency_trace too. The advantage that trace_pipe has is that it can record live traces. It will block when there is nothing in the buffer, and read the entries as they are entered. An EOF happens when tracing is disabled (tracing_enabled = 0). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 353 ++++++++++++++++++++++++++++++++++++++++++++------- kernel/trace/trace.h | 2 + 2 files changed, 309 insertions(+), 46 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d39f4faec7c..a40687a4413 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -174,15 +174,20 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; va_list ap; + int ret; if (!len) return 0; va_start(ap, fmt); - len = vsnprintf(s->buffer + s->len, len, fmt, ap); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); va_end(ap); - s->len += len; + /* If we can't write it all, don't bother writing anything */ + if (ret > len) + return 0; + + s->len += ret; return len; } @@ -193,7 +198,7 @@ trace_seq_puts(struct trace_seq *s, const char *str) int len = strlen(str); if (len > ((PAGE_SIZE - 1) - s->len)) - len = (PAGE_SIZE - 1) - s->len; + return 0; memcpy(s->buffer + s->len, str, len); s->len += len; @@ -615,11 +620,13 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, { struct trace_entry *entry; + spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; + spin_unlock(&data->lock); } notrace void @@ -630,6 +637,7 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct trace_entry *entry; + spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -638,6 +646,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + spin_unlock(&data->lock); } enum trace_file_type { @@ -652,7 +661,9 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *array; if (iter->next_idx[cpu] >= tr->entries || - iter->next_idx[cpu] >= data->trace_idx) + iter->next_idx[cpu] >= data->trace_idx || + (data->trace_head == data->trace_tail && + data->trace_head_idx == data->trace_tail_idx)) return NULL; if (!iter->next_page[cpu]) { @@ -702,33 +713,57 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static void *find_next_entry_inc(struct trace_iterator *iter) +static notrace void +trace_iterator_increment(struct trace_iterator *iter) { - struct trace_entry *next; - int next_cpu = -1; + iter->idx++; + iter->next_idx[iter->cpu]++; + iter->next_page_idx[iter->cpu]++; + if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) { + struct trace_array_cpu *data = iter->tr->data[iter->cpu]; - next = find_next_entry(iter, &next_cpu); + iter->next_page_idx[iter->cpu] = 0; + iter->next_page[iter->cpu] = + trace_next_list(data, iter->next_page[iter->cpu]); + } +} - if (next) { - iter->idx++; - iter->next_idx[next_cpu]++; - iter->next_page_idx[next_cpu]++; +static notrace void +trace_consume(struct trace_iterator *iter) +{ + struct trace_array_cpu *data = iter->tr->data[iter->cpu]; + + data->trace_tail_idx++; + if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { + data->trace_tail = trace_next_page(data, data->trace_tail); + data->trace_tail_idx = 0; + } - if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { - struct trace_array_cpu *data = iter->tr->data[next_cpu]; + /* Check if we empty it, then reset the index */ + if (data->trace_head == data->trace_tail && + data->trace_head_idx == data->trace_tail_idx) + data->trace_idx = 0; - iter->next_page_idx[next_cpu] = 0; - iter->next_page[next_cpu] = - trace_next_list(data, iter->next_page[next_cpu]); + trace_iterator_increment(iter); +} + +static notrace void * +find_next_entry_inc(struct trace_iterator *iter) +{ + struct trace_entry *next; + int next_cpu = -1; + + next = find_next_entry(iter, &next_cpu); - } - } iter->prev_ent = iter->ent; iter->prev_cpu = iter->cpu; iter->ent = next; iter->cpu = next_cpu; + if (next) + trace_iterator_increment(iter); + return next ? iter : NULL; } @@ -815,7 +850,7 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -static void +static int seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS @@ -823,11 +858,12 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) kallsyms_lookup(address, NULL, NULL, NULL, str); - trace_seq_printf(s, fmt, str); + return trace_seq_printf(s, fmt, str); #endif + return 1; } -static void +static int seq_print_sym_offset(struct trace_seq *s, const char *fmt, unsigned long address) { @@ -835,8 +871,9 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, char str[KSYM_SYMBOL_LEN]; sprint_symbol(str, address); - trace_seq_printf(s, fmt, str); + return trace_seq_printf(s, fmt, str); #endif + return 1; } #ifndef CONFIG_64BIT @@ -845,21 +882,25 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static notrace void +static notrace int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { - if (!ip) { - trace_seq_printf(s, "0"); - return; - } + int ret; + + if (!ip) + return trace_seq_printf(s, "0"); if (sym_flags & TRACE_ITER_SYM_OFFSET) - seq_print_sym_offset(s, "%s", ip); + ret = seq_print_sym_offset(s, "%s", ip); else - seq_print_sym_short(s, "%s", ip); + ret = seq_print_sym_short(s, "%s", ip); + + if (!ret) + return 0; if (sym_flags & TRACE_ITER_SYM_ADDR) - trace_seq_printf(s, " <" IP_FMT ">", ip); + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; } static notrace void print_lat_help_header(struct seq_file *m) @@ -1089,7 +1130,7 @@ static notrace void sync_time_offset(struct trace_iterator *iter) array->time_offset += prev_t - t; } -static notrace void +static notrace int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1100,6 +1141,7 @@ print_trace_fmt(struct trace_iterator *iter) unsigned long secs; char *comm; int S; + int ret; sync_time_offset(iter); entry = iter->ent; @@ -1110,31 +1152,49 @@ print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - trace_seq_printf(s, "[%02d] ", iter->cpu); - trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(s, entry->fn.ip, sym_flags); + ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); + if (!ret) + return 0; if ((sym_flags & TRACE_ITER_PRINT_PARENT) && entry->fn.parent_ip) { - trace_seq_printf(s, " <-"); - seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + ret = trace_seq_printf(s, " <-"); + if (!ret) + return 0; + ret = seq_print_ip_sym(s, entry->fn.parent_ip, + sym_flags); + if (!ret) + return 0; } - trace_seq_printf(s, "\n"); + ret = trace_seq_printf(s, "\n"); + if (!ret) + return 0; break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio); + ret = trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + if (!ret) + return 0; break; } + return 1; } static int trace_empty(struct trace_iterator *iter) @@ -1145,7 +1205,9 @@ static int trace_empty(struct trace_iterator *iter) for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; - if (head_page(data) && data->trace_idx) + if (head_page(data) && data->trace_idx && + (data->trace_tail != data->trace_head || + data->trace_tail_idx != data->trace_head_idx)) return 0; } return 1; @@ -1645,6 +1707,192 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return cnt; } +static atomic_t tracing_reader; + +static int tracing_open_pipe(struct inode *inode, struct file *filp) +{ + struct trace_iterator *iter; + + if (tracing_disabled) + return -ENODEV; + + /* We only allow for reader of the pipe */ + if (atomic_inc_return(&tracing_reader) != 1) { + atomic_dec(&tracing_reader); + return -EBUSY; + } + + /* create a buffer to store the information to pass to userspace */ + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->tr = &global_trace; + + filp->private_data = iter; + + return 0; +} + +static int tracing_release_pipe(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter = file->private_data; + + kfree(iter); + atomic_dec(&tracing_reader); + + return 0; +} + +/* + * Consumer reader. + */ +static ssize_t +tracing_read_pipe(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_iterator *iter = filp->private_data; + struct trace_array_cpu *data; + static cpumask_t mask; + struct trace_entry *entry; + static int start; + unsigned long flags; + int read = 0; + int cpu; + int len; + int ret; + + /* return any leftover data */ + if (iter->seq.len > start) { + len = iter->seq.len - start; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt); + if (ret) + cnt = -EFAULT; + + start += len; + + return cnt; + } + + trace_seq_reset(&iter->seq); + start = 0; + + while (trace_empty(iter)) { + /* + * This is a make-shift waitqueue. The reason we don't use + * an actual wait queue is because: + * 1) we only ever have one waiter + * 2) the tracing, traces all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * Anyway, this is really very primitive wakeup. + */ + set_current_state(TASK_INTERRUPTIBLE); + iter->tr->waiter = current; + + /* sleep for one second, and try again. */ + schedule_timeout(HZ); + + iter->tr->waiter = NULL; + + if (signal_pending(current)) + return -EINTR; + + /* + * We block until we read something and tracing is disabled. + * We still block if tracing is disabled, but we have never + * read anything. This allows a user to cat this file, and + * then enable tracing. But after we have read something, + * we give an EOF when tracing is again disabled. + * + * iter->pos will be 0 if we haven't read anything. + */ + if (!tracer_enabled && iter->pos) + break; + + continue; + } + + /* stop when tracing is finished */ + if (trace_empty(iter)) + return 0; + + if (cnt >= PAGE_SIZE) + cnt = PAGE_SIZE - 1; + + memset(iter, 0, sizeof(*iter)); + iter->tr = &global_trace; + iter->pos = -1; + + /* + * We need to stop all tracing on all CPUS to read the + * the next buffer. This is a bit expensive, but is + * not done often. We fill all what we can read, + * and then release the locks again. + */ + + cpus_clear(mask); + local_irq_save(flags); + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (!head_page(data) || !data->trace_idx) + continue; + + atomic_inc(&data->disabled); + spin_lock(&data->lock); + cpu_set(cpu, mask); + } + + while ((entry = find_next_entry(iter, &cpu))) { + + if (!entry) + break; + + iter->ent = entry; + iter->cpu = cpu; + + ret = print_trace_fmt(iter); + if (!ret) + break; + + trace_consume(iter); + + if (iter->seq.len >= cnt) + break; + + } + + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (!cpu_isset(cpu, mask)) + continue; + spin_unlock(&data->lock); + atomic_dec(&data->disabled); + } + local_irq_restore(flags); + + /* Now copy what we have to the user */ + read = iter->seq.len; + if (read > cnt) + read = cnt; + + ret = copy_to_user(ubuf, iter->seq.buffer, read); + + if (read < iter->seq.len) + start = read; + else + trace_seq_reset(&iter->seq); + + if (ret) + read = -EFAULT; + + return read; +} + static struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -1663,6 +1911,12 @@ static struct file_operations set_tracer_fops = { .write = tracing_set_trace_write, }; +static struct file_operations tracing_pipe_fops = { + .open = tracing_open_pipe, + .read = tracing_read_pipe, + .release = tracing_release_pipe, +}; + #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t @@ -1763,6 +2017,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'README' entry\n"); + entry = debugfs_create_file("trace_pipe", 0644, d_tracer, + NULL, &tracing_pipe_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, @@ -1816,6 +2075,7 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; + spin_lock_init(&data->lock); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -1823,6 +2083,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; + spin_lock_init(&data->lock); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5b32ca0b45..29a7ea59de5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -55,6 +55,7 @@ struct trace_entry { struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; + spinlock_t lock; cycle_t time_offset; /* these fields get copied into max-trace: */ @@ -88,6 +89,7 @@ struct trace_array { long ctrl; int cpu; cycle_t time_start; + struct task_struct *waiter; struct trace_array_cpu *data[NR_CPUS]; }; -- cgit v1.2.3-70-g09d2 From d4c5a2f5870939d837293de87b41dda0012a4572 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: fix locking we can hold all cpu trace buffer locks at once - put each into a separate lock class. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 8 +++----- kernel/trace/trace.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a40687a4413..b3811ca7407 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1865,11 +1865,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - - if (!cpu_isset(cpu, mask)) - continue; spin_unlock(&data->lock); atomic_dec(&data->disabled); } @@ -2076,6 +2073,7 @@ static int trace_alloc_page(void) for_each_possible_cpu(i) { data = global_trace.data[i]; spin_lock_init(&data->lock); + lockdep_set_class(&data->lock, &data->lock_key); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2084,6 +2082,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; spin_lock_init(&data->lock); + lockdep_set_class(&data->lock, &data->lock_key); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2203,5 +2202,4 @@ __init static int tracer_alloc_buffers(void) } return ret; } - fs_initcall(tracer_alloc_buffers); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 29a7ea59de5..b0408356f0e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,6 +56,7 @@ struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; spinlock_t lock; + struct lock_class_key lock_key; cycle_t time_offset; /* these fields get copied into max-trace: */ -- cgit v1.2.3-70-g09d2 From 4bf39a9411a4ce8712954e03a9bd1592ee345919 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: cleanups no code changed. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/ftrace.c | 5 +++- kernel/trace/trace.c | 72 +++++++++++++++++++++++++-------------------------- 2 files changed, 39 insertions(+), 38 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5e9389faaf7..97c40865a93 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -756,9 +756,11 @@ ftrace_avail_open(struct inode *inode, struct file *file) ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { struct seq_file *m = file->private_data; + m->private = iter; - } else + } else { kfree(iter); + } return ret; } @@ -770,6 +772,7 @@ int ftrace_avail_release(struct inode *inode, struct file *file) seq_release(inode, file); kfree(iter); + return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b3811ca7407..4550afda960 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1245,10 +1245,10 @@ static int s_show(struct seq_file *m, void *v) } static struct seq_operations tracer_seq_ops = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, }; static struct trace_iterator notrace * @@ -1397,10 +1397,10 @@ static int t_show(struct seq_file *m, void *v) } static struct seq_operations show_traces_seq_ops = { - .start = t_start, - .next = t_next, - .stop = t_stop, - .show = t_show, + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, }; static int show_traces_open(struct inode *inode, struct file *file) @@ -1420,17 +1420,17 @@ static int show_traces_open(struct inode *inode, struct file *file) } static struct file_operations tracing_fops = { - .open = tracing_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, + .open = tracing_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, }; static struct file_operations tracing_lt_fops = { - .open = tracing_lt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, + .open = tracing_lt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, }; static struct file_operations show_traces_fops = { @@ -1620,8 +1620,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, r = sprintf(buf, "\n"); mutex_unlock(&trace_types_lock); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t @@ -1680,8 +1679,7 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf, *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); if (r > 64) r = 64; - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t @@ -1891,27 +1889,27 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } static struct file_operations tracing_max_lat_fops = { - .open = tracing_open_generic, - .read = tracing_max_lat_read, - .write = tracing_max_lat_write, + .open = tracing_open_generic, + .read = tracing_max_lat_read, + .write = tracing_max_lat_write, }; static struct file_operations tracing_ctrl_fops = { - .open = tracing_open_generic, - .read = tracing_ctrl_read, - .write = tracing_ctrl_write, + .open = tracing_open_generic, + .read = tracing_ctrl_read, + .write = tracing_ctrl_write, }; static struct file_operations set_tracer_fops = { - .open = tracing_open_generic, - .read = tracing_set_trace_read, - .write = tracing_set_trace_write, + .open = tracing_open_generic, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, }; static struct file_operations tracing_pipe_fops = { - .open = tracing_open_pipe, - .read = tracing_read_pipe, - .release = tracing_release_pipe, + .open = tracing_open_pipe, + .read = tracing_read_pipe, + .release = tracing_release_pipe, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -1925,13 +1923,13 @@ tracing_read_long(struct file *filp, char __user *ubuf, int r; r = sprintf(buf, "%ld\n", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static struct file_operations tracing_read_long_fops = { - .open = tracing_open_generic, - .read = tracing_read_long, + .open = tracing_open_generic, + .read = tracing_read_long, }; #endif @@ -2033,7 +2031,7 @@ static __init void tracer_init_debugfs(void) /* dummy trace to disable tracing */ static struct tracer no_tracer __read_mostly = { - .name = "none", + .name = "none", }; static int trace_alloc_page(void) -- cgit v1.2.3-70-g09d2 From 750ed1a40783432d0dcb0e6c2e813a12615d7664 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: timestamp syncing, prepare rename and uninline now() to ftrace_now(). Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace.c | 7 ++++++- kernel/trace/trace.h | 5 +---- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_irqsoff.c | 6 +++--- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_sched_wakeup.c | 4 ++-- 7 files changed, 16 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 97c40865a93..a15e068535f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -531,7 +531,7 @@ static int notrace __ftrace_update_code(void *ignore) save_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; - start = now(raw_smp_processor_id()); + start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; /* No locks needed, the machine is stopped! */ @@ -550,7 +550,7 @@ static int notrace __ftrace_update_code(void *ignore) } - stop = now(raw_smp_processor_id()); + stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4550afda960..e3778ab0d3f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,6 +42,11 @@ ns2usecs(cycle_t nsec) return nsec; } +notrace cycle_t ftrace_now(int cpu) +{ + return cpu_clock(cpu); +} + static atomic_t tracer_counter; static struct trace_array global_trace; @@ -607,7 +612,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) entry->idx = atomic_inc_return(&tracer_counter); entry->preempt_count = pc & 0xff; entry->pid = tsk->pid; - entry->t = now(raw_smp_processor_id()); + entry->t = ftrace_now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0408356f0e..30cad677e9d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -171,10 +171,7 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -static inline notrace cycle_t now(int cpu) -{ - return cpu_clock(cpu); -} +extern notrace cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER extern void notrace diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5d8ad7a0960..e5d34b78fc9 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -20,7 +20,7 @@ static notrace void function_reset(struct trace_array *tr) { int cpu; - tr->time_start = now(tr->cpu); + tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) tracing_reset(tr->data[cpu]); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2dfebb67fdf..d2a6e6f1ad2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,7 @@ check_critical_timing(struct trace_array *tr, * as long as possible: */ T0 = data->preempt_timestamp; - T1 = now(cpu); + T1 = ftrace_now(cpu); delta = T1-T0; local_save_flags(flags); @@ -186,7 +186,7 @@ out_unlock: out: data->critical_sequence = max_sequence; - data->preempt_timestamp = now(cpu); + data->preempt_timestamp = ftrace_now(cpu); tracing_reset(data); ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); } @@ -215,7 +215,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); data->critical_sequence = max_sequence; - data->preempt_timestamp = now(cpu); + data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; tracing_reset(data); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 6c9284103a6..8d656672da9 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -61,7 +61,7 @@ static notrace void sched_switch_reset(struct trace_array *tr) { int cpu; - tr->time_start = now(tr->cpu); + tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) tracing_reset(tr->data[cpu]); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 688df965f3f..b7df825c3af 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -92,7 +92,7 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) * as long as possible: */ T0 = data->preempt_timestamp; - T1 = now(cpu); + T1 = ftrace_now(cpu); delta = T1-T0; if (!report_latency(delta)) @@ -191,7 +191,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, local_save_flags(flags); - tr->data[wakeup_cpu]->preempt_timestamp = now(cpu); + tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: -- cgit v1.2.3-70-g09d2 From 53c37c17aafcf50f7c6fddaf01dda8f9d7e31ddf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: fast, scalable, synchronized timestamps implement globally synchronized, fast and scalable time source for tracing. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e3778ab0d3f..9a931c7c2da 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,9 +42,61 @@ ns2usecs(cycle_t nsec) return nsec; } +static const int time_sync_freq_max = 128; +static const cycle_t time_sync_thresh = 100000; + +static DEFINE_PER_CPU(cycle_t, time_offset); +static DEFINE_PER_CPU(cycle_t, prev_cpu_time); +static DEFINE_PER_CPU(int, time_sync_count); +static DEFINE_PER_CPU(int, time_sync_freq); + +/* + * Global lock which we take every now and then to synchronize + * the CPUs time. This method is not warp-safe, but it's good + * enough to synchronize slowly diverging time sources and thus + * it's good enough for tracing: + */ +static DEFINE_SPINLOCK(time_sync_lock); +static cycle_t prev_global_time; + +static notrace cycle_t __ftrace_now_sync(cycles_t time, int cpu) +{ + unsigned long flags; + + spin_lock_irqsave(&time_sync_lock, flags); + + /* + * Update the synchronization frequency: + */ + if (per_cpu(time_sync_freq, cpu) < time_sync_freq_max) + per_cpu(time_sync_freq, cpu) *= 2; + per_cpu(time_sync_count, cpu) = per_cpu(time_sync_freq, cpu); + + if (time < prev_global_time) { + per_cpu(time_offset, cpu) += prev_global_time - time; + time = prev_global_time; + } else { + prev_global_time = time; + } + + spin_unlock_irqrestore(&time_sync_lock, flags); + + return time; +} + notrace cycle_t ftrace_now(int cpu) { - return cpu_clock(cpu); + cycle_t prev_cpu_time, time, delta_time; + + prev_cpu_time = per_cpu(prev_cpu_time, cpu); + time = sched_clock() + per_cpu(time_offset, cpu); + delta_time = time-prev_cpu_time; + + if (unlikely(delta_time > time_sync_thresh || + --per_cpu(time_sync_count, cpu) <= 0)) + time = __ftrace_now_sync(time, cpu); + + return time; } static atomic_t tracer_counter; -- cgit v1.2.3-70-g09d2 From cdd31cd2d7a0dcbec2cce3974f7129dd4fc8c879 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: remove-idx-sync remove idx syncing - it's expensive on SMP. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 40 +++++----------------------------------- kernel/trace/trace.h | 2 -- 2 files changed, 5 insertions(+), 37 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9a931c7c2da..ce8ceb8aea6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -99,7 +99,6 @@ notrace cycle_t ftrace_now(int cpu) return time; } -static atomic_t tracer_counter; static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); @@ -661,7 +660,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) pc = preempt_count(); - entry->idx = atomic_inc_return(&tracer_counter); entry->preempt_count = pc & 0xff; entry->pid = tsk->pid; entry->t = ftrace_now(raw_smp_processor_id()); @@ -757,8 +755,10 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); - if (ent && - (!next || (long)(next->idx - ent->idx) > 0)) { + /* + * Pick the entry with the smallest timestamp: + */ + if (ent && (!next || ent->t < next->t)) { next = ent; next_cpu = cpu; } @@ -800,8 +800,6 @@ trace_consume(struct trace_iterator *iter) if (data->trace_head == data->trace_tail && data->trace_head_idx == data->trace_tail_idx) data->trace_idx = 0; - - trace_iterator_increment(iter); } static notrace void * @@ -1160,33 +1158,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } } -static notrace void sync_time_offset(struct trace_iterator *iter) -{ - struct trace_array_cpu *prev_array, *array; - struct trace_entry *prev_entry, *entry; - cycle_t prev_t, t; - - entry = iter->ent; - prev_entry = iter->prev_ent; - if (!prev_entry) - return; - - prev_array = iter->tr->data[iter->prev_cpu]; - array = iter->tr->data[iter->cpu]; - - prev_t = prev_entry->t + prev_array->time_offset; - t = entry->t + array->time_offset; - - /* - * If time goes backwards we increase the offset of - * the current array, to not have observable time warps. - * This will quickly synchronize the time offsets of - * multiple CPUs: - */ - if (t < prev_t) - array->time_offset += prev_t - t; -} - static notrace int print_trace_fmt(struct trace_iterator *iter) { @@ -1200,12 +1171,11 @@ print_trace_fmt(struct trace_iterator *iter) int S; int ret; - sync_time_offset(iter); entry = iter->ent; comm = trace_find_cmdline(iter->ent->pid); - t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset); + t = ns2usecs(entry->t); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 30cad677e9d..27fa2d06f49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -38,7 +38,6 @@ struct trace_entry { char preempt_count; int pid; cycle_t t; - unsigned long idx; union { struct ftrace_entry fn; struct ctx_switch_entry ctx; @@ -57,7 +56,6 @@ struct trace_array_cpu { atomic_t disabled; spinlock_t lock; struct lock_class_key lock_key; - cycle_t time_offset; /* these fields get copied into max-trace: */ unsigned trace_head_idx; -- cgit v1.2.3-70-g09d2 From 8c523a9d82dbc4f3f7d972df8c0f1eacd83d0d55 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: clean-up-pipe-iteration Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ce8ceb8aea6..42f1926acf7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,12 +770,12 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static notrace void -trace_iterator_increment(struct trace_iterator *iter) +static notrace void trace_iterator_increment(struct trace_iterator *iter) { iter->idx++; iter->next_idx[iter->cpu]++; iter->next_page_idx[iter->cpu]++; + if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -785,8 +785,7 @@ trace_iterator_increment(struct trace_iterator *iter) } } -static notrace void -trace_consume(struct trace_iterator *iter) +static notrace void trace_consume(struct trace_iterator *iter) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -802,8 +801,7 @@ trace_consume(struct trace_iterator *iter) data->trace_idx = 0; } -static notrace void * -find_next_entry_inc(struct trace_iterator *iter) +static notrace void *find_next_entry_inc(struct trace_iterator *iter) { struct trace_entry *next; int next_cpu = -1; @@ -1871,14 +1869,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpu_set(cpu, mask); } - while ((entry = find_next_entry(iter, &cpu))) { - - if (!entry) - break; - - iter->ent = entry; - iter->cpu = cpu; - + while ((entry = find_next_entry_inc(iter)) != NULL) { ret = print_trace_fmt(iter); if (!ret) break; @@ -1887,7 +1878,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (iter->seq.len >= cnt) break; - } for_each_cpu_mask(cpu, mask) { -- cgit v1.2.3-70-g09d2 From f9896bf30928922a3913a3810a4ab7908da6cfe7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: add raw output Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 63 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 42f1926acf7..bebd263f582 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -153,6 +153,7 @@ enum trace_iterator_flags { TRACE_ITER_SYM_OFFSET = 0x02, TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, + TRACE_ITER_RAW = 0x10, }; #define TRACE_ITER_SYM_MASK \ @@ -164,6 +165,7 @@ static const char *trace_options[] = { "sym-offset", "sym-addr", "verbose", + "raw", NULL }; @@ -1099,7 +1101,7 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static notrace void +static notrace int print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; @@ -1154,10 +1156,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } + return 1; } -static notrace int -print_trace_fmt(struct trace_iterator *iter) +static notrace int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1222,6 +1224,43 @@ print_trace_fmt(struct trace_iterator *iter) return 1; } +static notrace int print_raw_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry; + int ret; + int S; + + entry = iter->ent; + + ret = trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, entry->t); + if (!ret) + return 0; + + switch (entry->type) { + case TRACE_FN: + ret = trace_seq_printf(s, "%x %x\n", + entry->fn.ip, entry->fn.parent_ip); + if (!ret) + return 0; + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + ret = trace_seq_printf(s, "%d %d %c %d %d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + if (!ret) + return 0; + break; + } + return 1; +} + static int trace_empty(struct trace_iterator *iter) { struct trace_array_cpu *data; @@ -1238,6 +1277,17 @@ static int trace_empty(struct trace_iterator *iter) return 1; } +static int print_trace_line(struct trace_iterator *iter) +{ + if (trace_flags & TRACE_ITER_RAW) + return print_raw_fmt(iter); + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + return print_lat_fmt(iter, iter->idx, iter->cpu); + + return print_trace_fmt(iter); +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -1259,10 +1309,7 @@ static int s_show(struct seq_file *m, void *v) print_func_help_header(m); } } else { - if (iter->iter_flags & TRACE_FILE_LAT_FMT) - print_lat_fmt(iter, iter->idx, iter->cpu); - else - print_trace_fmt(iter); + print_trace_line(iter); trace_print_seq(m, &iter->seq); } @@ -1870,7 +1917,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } while ((entry = find_next_entry_inc(iter)) != NULL) { - ret = print_trace_fmt(iter); + ret = print_trace_line(iter); if (!ret) break; -- cgit v1.2.3-70-g09d2 From cb0f12aae8d085140d37ada351aa5a8e76c3f9b0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: bin-output Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bebd263f582..d78cbc4fc51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -154,6 +154,7 @@ enum trace_iterator_flags { TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, TRACE_ITER_RAW = 0x10, + TRACE_ITER_BIN = 0x20, }; #define TRACE_ITER_SYM_MASK \ @@ -166,6 +167,7 @@ static const char *trace_options[] = { "sym-addr", "verbose", "raw", + "bin", NULL }; @@ -275,6 +277,18 @@ trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } +static notrace int +trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) +{ + if (len > ((PAGE_SIZE - 1) - s->len)) + return 0; + + memcpy(s->buffer + s->len, mem, len); + s->len += len; + + return len; +} + static notrace void trace_seq_reset(struct trace_seq *s) { @@ -1261,6 +1275,39 @@ static notrace int print_raw_fmt(struct trace_iterator *iter) return 1; } +#define SEQ_PUT_FIELD_RET(s, x) \ +do { \ + if (!trace_seq_putmem(s, &(x), sizeof(x))) \ + return 0; \ +} while (0) + +static notrace int print_bin_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry; + + entry = iter->ent; + + SEQ_PUT_FIELD_RET(s, entry->pid); + SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, entry->t); + + switch (entry->type) { + case TRACE_FN: + SEQ_PUT_FIELD_RET(s, entry->fn.ip); + SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_CTX: + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); + break; + } + return 1; +} + static int trace_empty(struct trace_iterator *iter) { struct trace_array_cpu *data; @@ -1279,6 +1326,9 @@ static int trace_empty(struct trace_iterator *iter) static int print_trace_line(struct trace_iterator *iter) { + if (trace_flags & TRACE_ITER_BIN) + return print_bin_fmt(iter); + if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); -- cgit v1.2.3-70-g09d2 From f0a920d5752e1788c0cba2add103076bcc0f7a49 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: add trace_special() for ad-hoc tracing. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 15 +++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d78cbc4fc51..fa13059eb46 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -137,6 +137,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, + TRACE_SPECIAL, __TRACE_LAST_TYPE }; @@ -700,6 +701,22 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock(&data->lock); } +notrace void +trace_special(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_entry *entry; + + spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_SPECIAL; + entry->special.arg1 = arg1; + entry->special.arg2 = arg2; + entry->special.arg3 = arg3; + spin_unlock(&data->lock); +} + notrace void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, @@ -1167,6 +1184,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) entry->ctx.next_prio, comm); break; + case TRACE_SPECIAL: + trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1234,6 +1257,14 @@ static notrace int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_SPECIAL: + ret = trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + if (!ret) + return 0; + break; } return 1; } @@ -1271,6 +1302,14 @@ static notrace int print_raw_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_SPECIAL: + ret = trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + if (!ret) + return 0; + break; } return 1; } @@ -1304,6 +1343,11 @@ static notrace int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); break; + case TRACE_SPECIAL: + SEQ_PUT_FIELD_RET(s, entry->special.arg1); + SEQ_PUT_FIELD_RET(s, entry->special.arg2); + SEQ_PUT_FIELD_RET(s, entry->special.arg3); + break; } return 1; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 27fa2d06f49..7bdfef35c05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -25,6 +25,15 @@ struct ctx_switch_entry { unsigned char next_prio; }; +/* + * Special (free-form) trace entry: + */ +struct special_entry { + unsigned long arg1; + unsigned long arg2; + unsigned long arg3; +}; + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -41,6 +50,7 @@ struct trace_entry { union { struct ftrace_entry fn; struct ctx_switch_entry ctx; + struct special_entry special; }; }; @@ -154,6 +164,11 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags); void tracing_record_cmdline(struct task_struct *tsk); +void trace_special(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3); void tracing_start_function_trace(void); void tracing_stop_function_trace(void); -- cgit v1.2.3-70-g09d2 From dcb6308f2b56720599f6b9d5a01c33e67e69bde4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace, locking fix should be an irq-safe lock ... Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fa13059eb46..70f94fa92c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -691,14 +691,15 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } notrace void @@ -706,15 +707,16 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_SPECIAL; entry->special.arg1 = arg1; entry->special.arg2 = arg2; entry->special.arg3 = arg3; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } notrace void @@ -724,8 +726,9 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -734,7 +737,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } enum trace_file_type { -- cgit v1.2.3-70-g09d2 From 088b1e427dbba2af93cb6a7d39258c10ff58dd27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: pipe fixes Some fixes for better output with the trace pipe. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70f94fa92c1..c56fc5e6013 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,11 +770,6 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, array = page_address(page); - /* Still possible to catch up to the tail */ - if (iter->next_idx[cpu] && array == data->trace_tail && - iter->next_page_idx[cpu] == data->trace_tail_idx) - return NULL; - WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); return &array[iter->next_page_idx[cpu]]; } @@ -1921,7 +1916,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; static cpumask_t mask; - struct trace_entry *entry; static int start; unsigned long flags; int read = 0; @@ -2013,10 +2007,15 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpu_set(cpu, mask); } - while ((entry = find_next_entry_inc(iter)) != NULL) { + while (find_next_entry_inc(iter) != NULL) { + int len = iter->seq.len; + ret = print_trace_line(iter); - if (!ret) + if (!ret) { + /* don't print partial lines */ + iter->seq.len = len; break; + } trace_consume(iter); -- cgit v1.2.3-70-g09d2 From 26994ead1fc8cced63f17e9848edc1771036664e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: enabled tracing by default This patch is the correct way to have tracing enabled by default. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c56fc5e6013..3dc6eac85b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -107,7 +107,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); -static int tracer_enabled; +static int tracer_enabled = 1; static unsigned long trace_nr_entries = 16384UL; static struct tracer *trace_types __read_mostly; @@ -2268,6 +2268,8 @@ __init static int tracer_alloc_buffers(void) int ret = -ENOMEM; int i; + global_trace.ctrl = tracer_enabled; + /* Allocate the first page for all buffers */ for_each_possible_cpu(i) { data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); -- cgit v1.2.3-70-g09d2 From 0fd9e0dac9026df09986a4b201518ae015814aef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: use cpu clock again Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 54 +--------------------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3dc6eac85b1..d74c039305a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,61 +42,9 @@ ns2usecs(cycle_t nsec) return nsec; } -static const int time_sync_freq_max = 128; -static const cycle_t time_sync_thresh = 100000; - -static DEFINE_PER_CPU(cycle_t, time_offset); -static DEFINE_PER_CPU(cycle_t, prev_cpu_time); -static DEFINE_PER_CPU(int, time_sync_count); -static DEFINE_PER_CPU(int, time_sync_freq); - -/* - * Global lock which we take every now and then to synchronize - * the CPUs time. This method is not warp-safe, but it's good - * enough to synchronize slowly diverging time sources and thus - * it's good enough for tracing: - */ -static DEFINE_SPINLOCK(time_sync_lock); -static cycle_t prev_global_time; - -static notrace cycle_t __ftrace_now_sync(cycles_t time, int cpu) -{ - unsigned long flags; - - spin_lock_irqsave(&time_sync_lock, flags); - - /* - * Update the synchronization frequency: - */ - if (per_cpu(time_sync_freq, cpu) < time_sync_freq_max) - per_cpu(time_sync_freq, cpu) *= 2; - per_cpu(time_sync_count, cpu) = per_cpu(time_sync_freq, cpu); - - if (time < prev_global_time) { - per_cpu(time_offset, cpu) += prev_global_time - time; - time = prev_global_time; - } else { - prev_global_time = time; - } - - spin_unlock_irqrestore(&time_sync_lock, flags); - - return time; -} - notrace cycle_t ftrace_now(int cpu) { - cycle_t prev_cpu_time, time, delta_time; - - prev_cpu_time = per_cpu(prev_cpu_time, cpu); - time = sched_clock() + per_cpu(time_offset, cpu); - delta_time = time-prev_cpu_time; - - if (unlikely(delta_time > time_sync_thresh || - --per_cpu(time_sync_count, cpu) <= 0)) - time = __ftrace_now_sync(time, cpu); - - return time; + return cpu_clock(cpu); } static struct trace_array global_trace; -- cgit v1.2.3-70-g09d2 From 2e0f57618529a2739a5e1570e6c445c9c966b595 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 111 ++++++++++++++++++++++++----------------- kernel/trace/trace_functions.c | 2 +- 2 files changed, 67 insertions(+), 46 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d74c039305a..71b25b79b3d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -432,47 +432,6 @@ notrace void tracing_reset(struct trace_array_cpu *data) data->trace_tail_idx = 0; } -#ifdef CONFIG_FTRACE -static notrace void -function_trace_call(unsigned long ip, unsigned long parent_ip) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - - if (unlikely(!tracer_enabled)) - return; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - ftrace(tr, data, ip, parent_ip, flags); - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = function_trace_call, -}; -#endif - -notrace void tracing_start_function_trace(void) -{ - register_ftrace_function(&trace_ops); -} - -notrace void tracing_stop_function_trace(void) -{ - unregister_ftrace_function(&trace_ops); -} - #define SAVED_CMDLINES 128 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; @@ -635,8 +594,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) } notrace void -ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) +__ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; unsigned long irq_flags; @@ -650,6 +609,14 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); } +notrace void +ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) +{ + if (likely(!atomic_read(&data->disabled))) + __ftrace(tr, data, ip, parent_ip, flags); +} + notrace void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) @@ -688,6 +655,47 @@ tracing_sched_switch_trace(struct trace_array *tr, spin_unlock_irqrestore(&data->lock, irq_flags); } +#ifdef CONFIG_FTRACE +static notrace void +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = function_trace_call, +}; + +notrace void tracing_start_function_trace(void) +{ + register_ftrace_function(&trace_ops); +} + +notrace void tracing_stop_function_trace(void) +{ + unregister_ftrace_function(&trace_ops); +} +#endif + enum trace_file_type { TRACE_FILE_LAT_FMT = 1, }; @@ -722,7 +730,7 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return &array[iter->next_page_idx[cpu]]; } -static struct notrace trace_entry * +static struct trace_entry * notrace find_next_entry(struct trace_iterator *iter, int *ent_cpu) { struct trace_array *tr = iter->tr; @@ -1866,6 +1874,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, static cpumask_t mask; static int start; unsigned long flags; + int ftrace_save; int read = 0; int cpu; int len; @@ -1944,6 +1953,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpus_clear(mask); local_irq_save(flags); + ftrace_save = ftrace_enabled; + ftrace_enabled = 0; + smp_wmb(); for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; @@ -1951,10 +1963,14 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, continue; atomic_inc(&data->disabled); - spin_lock(&data->lock); cpu_set(cpu, mask); } + for_each_cpu_mask(cpu, mask) { + data = iter->tr->data[cpu]; + spin_lock(&data->lock); + } + while (find_next_entry_inc(iter) != NULL) { int len = iter->seq.len; @@ -1974,8 +1990,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; spin_unlock(&data->lock); + } + + for_each_cpu_mask(cpu, mask) { + data = iter->tr->data[cpu]; atomic_dec(&data->disabled); } + ftrace_enabled = ftrace_save; local_irq_restore(flags); /* Now copy what we have to the user */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index e5d34b78fc9..69a0eb00a0a 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -9,10 +9,10 @@ * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 William Lee Irwin III */ -#include #include #include #include +#include #include "trace.h" -- cgit v1.2.3-70-g09d2 From 5e3ca0ec76fce92daa4eed0d02de9c79b1fe3920 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: introduce the "hex" output method Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 71b25b79b3d..6974b212e93 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -103,7 +103,8 @@ enum trace_iterator_flags { TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, TRACE_ITER_RAW = 0x10, - TRACE_ITER_BIN = 0x20, + TRACE_ITER_HEX = 0x20, + TRACE_ITER_BIN = 0x40, }; #define TRACE_ITER_SYM_MASK \ @@ -116,6 +117,7 @@ static const char *trace_options[] = { "sym-addr", "verbose", "raw", + "hex", "bin", NULL }; @@ -238,6 +240,47 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) return len; } +#define HEX_CHARS 17 + +static notrace int +trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) +{ + unsigned char hex[HEX_CHARS]; + unsigned char *data; + unsigned char byte; + int i, j; + + BUG_ON(len >= HEX_CHARS); + + data = mem; + +#ifdef __BIG_ENDIAN + for (i = 0, j = 0; i < len; i++) { +#else + for (i = len-1, j = 0; i >= 0; i--) { +#endif + byte = data[i]; + + hex[j] = byte & 0x0f; + if (hex[j] >= 10) + hex[j] += 'a' - 10; + else + hex[j] += '0'; + j++; + + hex[j] = byte >> 4; + if (hex[j] >= 10) + hex[j] += 'a' - 10; + else + hex[j] += '0'; + j++; + } + hex[j] = ' '; + j++; + + return trace_seq_putmem(s, hex, j); +} + static notrace void trace_seq_reset(struct trace_seq *s) { @@ -1274,6 +1317,51 @@ do { \ return 0; \ } while (0) +#define SEQ_PUT_HEX_FIELD_RET(s, x) \ +do { \ + if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ + return 0; \ +} while (0) + +static notrace int print_hex_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + unsigned char newline = '\n'; + struct trace_entry *entry; + int S; + + entry = iter->ent; + + SEQ_PUT_HEX_FIELD_RET(s, entry->pid); + SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); + SEQ_PUT_HEX_FIELD_RET(s, entry->t); + + switch (entry->type) { + case TRACE_FN: + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); + SEQ_PUT_HEX_FIELD_RET(s, S); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_SPECIAL: + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); + break; + } + SEQ_PUT_FIELD_RET(s, newline); + + return 1; +} + static notrace int print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1327,6 +1415,9 @@ static int print_trace_line(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); + if (trace_flags & TRACE_ITER_HEX) + return print_hex_fmt(iter); + if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); -- cgit v1.2.3-70-g09d2 From 2577046740fe6d77864128c6187c11125c2449ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix no need to backmerge, only affects ftrace-enabled kernels. (which is not the default) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6974b212e93..958c4d77a67 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1965,7 +1965,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, static cpumask_t mask; static int start; unsigned long flags; +#ifdef CONFIG_FTRACE int ftrace_save; +#endif int read = 0; int cpu; int len; @@ -2044,8 +2046,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpus_clear(mask); local_irq_save(flags); +#ifdef CONFIG_FTRACE ftrace_save = ftrace_enabled; ftrace_enabled = 0; +#endif smp_wmb(); for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; @@ -2087,7 +2091,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, data = iter->tr->data[cpu]; atomic_dec(&data->disabled); } +#ifdef CONFIG_FTRACE ftrace_enabled = ftrace_save; +#endif local_irq_restore(flags); /* Now copy what we have to the user */ -- cgit v1.2.3-70-g09d2 From 2a2cc8f7c4d0dfd75720867f7dc58d24f075edfc Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: allow the event pipe to be polled Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 958c4d77a67..d041578affd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,7 @@ static struct tracer *current_trace __read_mostly; static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); +static DECLARE_WAIT_QUEUE_HEAD (trace_wait); #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) @@ -105,6 +107,7 @@ enum trace_iterator_flags { TRACE_ITER_RAW = 0x10, TRACE_ITER_HEX = 0x20, TRACE_ITER_BIN = 0x40, + TRACE_ITER_BLOCK = 0x80, }; #define TRACE_ITER_SYM_MASK \ @@ -119,6 +122,7 @@ static const char *trace_options[] = { "raw", "hex", "bin", + "block", NULL }; @@ -650,6 +654,9 @@ __ftrace(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } notrace void @@ -675,6 +682,9 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } notrace void @@ -696,6 +706,9 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } #ifdef CONFIG_FTRACE @@ -1765,7 +1778,6 @@ static struct file_operations tracing_readme_fops = { .read = tracing_readme_read, }; - static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -1953,6 +1965,28 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) return 0; } +static unsigned int +tracing_poll_pipe(struct file *filp, poll_table *poll_table) +{ + struct trace_iterator *iter = filp->private_data; + + if (trace_flags & TRACE_ITER_BLOCK) { + /* + * Always select as readable when in blocking mode + */ + return POLLIN | POLLRDNORM; + } + else { + if (!trace_empty(iter)) + return POLLIN | POLLRDNORM; + poll_wait(filp, &trace_wait, poll_table); + if (!trace_empty(iter)) + return POLLIN | POLLRDNORM; + + return 0; + } +} + /* * Consumer reader. */ @@ -1991,6 +2025,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { + if (!(trace_flags & TRACE_ITER_BLOCK)) + return -EWOULDBLOCK; /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: @@ -2134,6 +2170,7 @@ static struct file_operations set_tracer_fops = { static struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, + .poll = tracing_poll_pipe, .read = tracing_read_pipe, .release = tracing_release_pipe, }; -- cgit v1.2.3-70-g09d2 From 6fb44b717c10ecf37beaaebd312f3afa93fed714 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: add trace_function api for other tracers to use A new check was added in the ftrace function that wont trace if the CPU trace buffer is disabled. Unfortunately, other tracers used ftrace() to write to the buffer after they disabled it. The new disable check makes these calls into a nop. This patch changes the __ftrace that is called without the check into a new api for the other tracers to use, called "trace_function". The other tracers use this interface instead when the trace CPU buffer is already disabled. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace.h | 5 +++++ kernel/trace/trace_irqsoff.c | 10 +++++----- kernel/trace/trace_sched_wakeup.c | 5 +++-- 4 files changed, 17 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d041578affd..9022c357032 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -641,8 +641,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) } notrace void -__ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) +trace_function(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; unsigned long irq_flags; @@ -664,7 +664,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { if (likely(!atomic_read(&data->disabled))) - __ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); } notrace void @@ -730,7 +730,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - __ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7bdfef35c05..faf9f67246a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -169,6 +169,11 @@ void trace_special(struct trace_array *tr, unsigned long arg1, unsigned long arg2, unsigned long arg3); +void trace_function(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long ip, + unsigned long parent_ip, + unsigned long flags); void tracing_start_function_trace(void); void tracing_stop_function_trace(void); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d2a6e6f1ad2..3269f4ff517 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); } @@ -150,7 +150,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); latency = nsecs_to_usecs(delta); @@ -188,7 +188,7 @@ out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); tracing_reset(data); - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); } static inline void notrace @@ -221,7 +221,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); __get_cpu_var(tracing_cpu) = 1; @@ -254,7 +254,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index b7df825c3af..3549e4154f1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -85,7 +85,7 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - ftrace(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); /* * usecs conversion is slow so we try to delay the conversion @@ -192,7 +192,8 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, local_save_flags(flags); tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); - ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(tr, tr->data[wakeup_cpu], + CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: spin_unlock(&wakeup_lock); -- cgit v1.2.3-70-g09d2 From e309b41dd65aa953f86765eeeecc941d8e1e8b8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: remove notrace now that we have a kbuild method for notrace, no need to pollute the C code with the annotations. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/ftrace.c | 66 +++++++++++++-------------- kernel/trace/trace.c | 94 +++++++++++++++++++-------------------- kernel/trace/trace.h | 6 +-- kernel/trace/trace_functions.c | 12 ++--- kernel/trace/trace_irqsoff.c | 40 ++++++++--------- kernel/trace/trace_sched_switch.c | 12 ++--- kernel/trace/trace_sched_wakeup.c | 28 ++++++------ kernel/trace/trace_selftest.c | 2 +- 8 files changed, 130 insertions(+), 130 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 57350cbd1f6..281d97a3208 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -53,7 +53,7 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; /* mcount is defined per arch in assembly */ EXPORT_SYMBOL(mcount); -notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; @@ -79,7 +79,7 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static int notrace __register_ftrace_function(struct ftrace_ops *ops) +static int __register_ftrace_function(struct ftrace_ops *ops) { /* Should never be called by interrupts */ spin_lock(&ftrace_lock); @@ -110,7 +110,7 @@ static int notrace __register_ftrace_function(struct ftrace_ops *ops) return 0; } -static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) +static int __unregister_ftrace_function(struct ftrace_ops *ops) { struct ftrace_ops **p; int ret = 0; @@ -197,7 +197,7 @@ static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; -static inline int notrace +static inline int ftrace_ip_in_hash(unsigned long ip, unsigned long key) { struct dyn_ftrace *p; @@ -214,13 +214,13 @@ ftrace_ip_in_hash(unsigned long ip, unsigned long key) return found; } -static inline void notrace +static inline void ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) { hlist_add_head(&node->node, &ftrace_hash[key]); } -static notrace void ftrace_free_rec(struct dyn_ftrace *rec) +static void ftrace_free_rec(struct dyn_ftrace *rec) { /* no locking, only called from kstop_machine */ @@ -229,7 +229,7 @@ static notrace void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) +static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -259,7 +259,7 @@ static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) return &ftrace_pages->records[ftrace_pages->index++]; } -static void notrace +static void ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; @@ -329,7 +329,7 @@ ftrace_record_ip(unsigned long ip) #define FTRACE_ADDR ((long)(ftrace_caller)) #define MCOUNT_ADDR ((long)(mcount)) -static void notrace +static void __ftrace_replace_code(struct dyn_ftrace *rec, unsigned char *old, unsigned char *new, int enable) { @@ -405,7 +405,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } -static void notrace ftrace_replace_code(int enable) +static void ftrace_replace_code(int enable) { unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; @@ -430,7 +430,7 @@ static void notrace ftrace_replace_code(int enable) } } -static notrace void ftrace_shutdown_replenish(void) +static void ftrace_shutdown_replenish(void) { if (ftrace_pages->next) return; @@ -439,7 +439,7 @@ static notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -static notrace void +static void ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; @@ -458,7 +458,7 @@ ftrace_code_disable(struct dyn_ftrace *rec) } } -static int notrace __ftrace_modify_code(void *data) +static int __ftrace_modify_code(void *data) { unsigned long addr; int *command = data; @@ -482,14 +482,14 @@ static int notrace __ftrace_modify_code(void *data) return 0; } -static void notrace ftrace_run_update_code(int command) +static void ftrace_run_update_code(int command) { stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } static ftrace_func_t saved_ftrace_func; -static void notrace ftrace_startup(void) +static void ftrace_startup(void) { int command = 0; @@ -514,7 +514,7 @@ static void notrace ftrace_startup(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_shutdown(void) +static void ftrace_shutdown(void) { int command = 0; @@ -539,7 +539,7 @@ static void notrace ftrace_shutdown(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_startup_sysctl(void) +static void ftrace_startup_sysctl(void) { int command = FTRACE_ENABLE_MCOUNT; @@ -557,7 +557,7 @@ static void notrace ftrace_startup_sysctl(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_shutdown_sysctl(void) +static void ftrace_shutdown_sysctl(void) { int command = FTRACE_DISABLE_MCOUNT; @@ -577,7 +577,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int notrace __ftrace_update_code(void *ignore) +static int __ftrace_update_code(void *ignore) { struct dyn_ftrace *p; struct hlist_head head; @@ -618,7 +618,7 @@ static int notrace __ftrace_update_code(void *ignore) return 0; } -static void notrace ftrace_update_code(void) +static void ftrace_update_code(void) { if (unlikely(ftrace_disabled)) return; @@ -626,7 +626,7 @@ static void notrace ftrace_update_code(void) stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); } -static int notrace ftraced(void *ignore) +static int ftraced(void *ignore) { unsigned long usecs; @@ -733,7 +733,7 @@ struct ftrace_iterator { unsigned filtered; }; -static void notrace * +static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; @@ -806,7 +806,7 @@ static struct seq_operations show_ftrace_seq_ops = { .show = t_show, }; -static int notrace +static int ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; @@ -845,7 +845,7 @@ int ftrace_avail_release(struct inode *inode, struct file *file) return 0; } -static void notrace ftrace_filter_reset(void) +static void ftrace_filter_reset(void) { struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -867,7 +867,7 @@ static void notrace ftrace_filter_reset(void) preempt_enable(); } -static int notrace +static int ftrace_filter_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; @@ -903,7 +903,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) return ret; } -static ssize_t notrace +static ssize_t ftrace_filter_read(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -913,7 +913,7 @@ ftrace_filter_read(struct file *file, char __user *ubuf, return -EPERM; } -static loff_t notrace +static loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int origin) { loff_t ret; @@ -933,7 +933,7 @@ enum { MATCH_END_ONLY, }; -static void notrace +static void ftrace_match(unsigned char *buff, int len) { char str[KSYM_SYMBOL_LEN]; @@ -1002,7 +1002,7 @@ ftrace_match(unsigned char *buff, int len) preempt_enable(); } -static ssize_t notrace +static ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1094,7 +1094,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. */ -notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) +void ftrace_set_filter(unsigned char *buf, int len, int reset) { if (unlikely(ftrace_disabled)) return; @@ -1107,7 +1107,7 @@ notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) mutex_unlock(&ftrace_filter_lock); } -static int notrace +static int ftrace_filter_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; @@ -1242,7 +1242,7 @@ static __init int ftrace_init_debugfs(void) fs_initcall(ftrace_init_debugfs); -static int __init notrace ftrace_dynamic_init(void) +static int __init ftrace_dynamic_init(void) { struct task_struct *p; unsigned long addr; @@ -1352,7 +1352,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) return ret; } -notrace int +int ftrace_enable_sysctl(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9022c357032..f5898051fdd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,7 +35,7 @@ unsigned long __read_mostly tracing_thresh; static int tracing_disabled = 1; -static long notrace +static long ns2usecs(cycle_t nsec) { nsec += 500; @@ -43,7 +43,7 @@ ns2usecs(cycle_t nsec) return nsec; } -notrace cycle_t ftrace_now(int cpu) +cycle_t ftrace_now(int cpu) { return cpu_clock(cpu); } @@ -135,7 +135,7 @@ static DEFINE_SPINLOCK(ftrace_max_lock); * structure. (this way the maximum trace is permanently saved, * for later retrieval via /debugfs/tracing/latency_trace) */ -static notrace void +static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -184,7 +184,7 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } -static notrace int +static int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; @@ -207,7 +207,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } -static notrace int +static int trace_seq_puts(struct trace_seq *s, const char *str) { int len = strlen(str); @@ -221,7 +221,7 @@ trace_seq_puts(struct trace_seq *s, const char *str) return len; } -static notrace int +static int trace_seq_putc(struct trace_seq *s, unsigned char c) { if (s->len >= (PAGE_SIZE - 1)) @@ -232,7 +232,7 @@ trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } -static notrace int +static int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) { if (len > ((PAGE_SIZE - 1) - s->len)) @@ -246,7 +246,7 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) #define HEX_CHARS 17 -static notrace int +static int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) { unsigned char hex[HEX_CHARS]; @@ -285,13 +285,13 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } -static notrace void +static void trace_seq_reset(struct trace_seq *s) { s->len = 0; } -static notrace void +static void trace_print_seq(struct seq_file *m, struct trace_seq *s) { int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; @@ -302,7 +302,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) trace_seq_reset(s); } -notrace static void +static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { struct list_head flip_pages; @@ -323,7 +323,7 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) check_pages(tr2); } -notrace void +void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; @@ -348,7 +348,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) * @tsk - task with the latency * @cpu - the cpu of the buffer to copy. */ -notrace void +void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -471,7 +471,7 @@ void unregister_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); } -notrace void tracing_reset(struct trace_array_cpu *data) +void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; data->trace_head = data->trace_tail = head_page(data); @@ -494,9 +494,9 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } -notrace void trace_stop_cmdline_recording(void); +void trace_stop_cmdline_recording(void); -static notrace void trace_save_cmdline(struct task_struct *tsk) +static void trace_save_cmdline(struct task_struct *tsk) { unsigned map; unsigned idx; @@ -531,7 +531,7 @@ static notrace void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static notrace char *trace_find_cmdline(int pid) +static char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; @@ -552,7 +552,7 @@ static notrace char *trace_find_cmdline(int pid) return cmdline; } -notrace void tracing_record_cmdline(struct task_struct *tsk) +void tracing_record_cmdline(struct task_struct *tsk) { if (atomic_read(&trace_record_cmdline_disabled)) return; @@ -560,7 +560,7 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) trace_save_cmdline(tsk); } -static inline notrace struct list_head * +static inline struct list_head * trace_next_list(struct trace_array_cpu *data, struct list_head *next) { /* @@ -574,7 +574,7 @@ trace_next_list(struct trace_array_cpu *data, struct list_head *next) return next; } -static inline notrace void * +static inline void * trace_next_page(struct trace_array_cpu *data, void *addr) { struct list_head *next; @@ -588,7 +588,7 @@ trace_next_page(struct trace_array_cpu *data, void *addr) return page_address(page); } -static inline notrace struct trace_entry * +static inline struct trace_entry * tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; @@ -623,7 +623,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) return entry; } -static inline notrace void +static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; @@ -640,7 +640,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } -notrace void +void trace_function(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { @@ -659,7 +659,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, wake_up (&trace_wait); } -notrace void +void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { @@ -667,7 +667,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -notrace void +void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { @@ -687,7 +687,7 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, wake_up (&trace_wait); } -notrace void +void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, @@ -712,7 +712,7 @@ tracing_sched_switch_trace(struct trace_array *tr, } #ifdef CONFIG_FTRACE -static notrace void +static void function_trace_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; @@ -741,12 +741,12 @@ static struct ftrace_ops trace_ops __read_mostly = .func = function_trace_call, }; -notrace void tracing_start_function_trace(void) +void tracing_start_function_trace(void) { register_ftrace_function(&trace_ops); } -notrace void tracing_stop_function_trace(void) +void tracing_stop_function_trace(void) { unregister_ftrace_function(&trace_ops); } @@ -786,7 +786,7 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return &array[iter->next_page_idx[cpu]]; } -static struct trace_entry * notrace +static struct trace_entry * find_next_entry(struct trace_iterator *iter, int *ent_cpu) { struct trace_array *tr = iter->tr; @@ -813,7 +813,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static notrace void trace_iterator_increment(struct trace_iterator *iter) +static void trace_iterator_increment(struct trace_iterator *iter) { iter->idx++; iter->next_idx[iter->cpu]++; @@ -828,7 +828,7 @@ static notrace void trace_iterator_increment(struct trace_iterator *iter) } } -static notrace void trace_consume(struct trace_iterator *iter) +static void trace_consume(struct trace_iterator *iter) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -844,7 +844,7 @@ static notrace void trace_consume(struct trace_iterator *iter) data->trace_idx = 0; } -static notrace void *find_next_entry_inc(struct trace_iterator *iter) +static void *find_next_entry_inc(struct trace_iterator *iter) { struct trace_entry *next; int next_cpu = -1; @@ -863,7 +863,7 @@ static notrace void *find_next_entry_inc(struct trace_iterator *iter) return next ? iter : NULL; } -static notrace void *s_next(struct seq_file *m, void *v, loff_t *pos) +static void *s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_iterator *iter = m->private; void *last_ent = iter->ent; @@ -978,7 +978,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static notrace int +static int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -999,7 +999,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } -static notrace void print_lat_help_header(struct seq_file *m) +static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); seq_puts(m, "# / _-----=> irqs-off \n"); @@ -1012,14 +1012,14 @@ static notrace void print_lat_help_header(struct seq_file *m) seq_puts(m, "# \\ / ||||| \\ | / \n"); } -static notrace void print_func_help_header(struct seq_file *m) +static void print_func_help_header(struct seq_file *m) { seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); seq_puts(m, "# | | | | |\n"); } -static notrace void +static void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1090,7 +1090,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static notrace void +static void lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; @@ -1127,7 +1127,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) unsigned long preempt_mark_thresh = 100; -static notrace void +static void lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, unsigned long rel_usecs) { @@ -1142,7 +1142,7 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static notrace int +static int print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; @@ -1206,7 +1206,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) return 1; } -static notrace int print_trace_fmt(struct trace_iterator *iter) +static int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1279,7 +1279,7 @@ static notrace int print_trace_fmt(struct trace_iterator *iter) return 1; } -static notrace int print_raw_fmt(struct trace_iterator *iter) +static int print_raw_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1336,7 +1336,7 @@ do { \ return 0; \ } while (0) -static notrace int print_hex_fmt(struct trace_iterator *iter) +static int print_hex_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; @@ -1375,7 +1375,7 @@ static notrace int print_hex_fmt(struct trace_iterator *iter) return 1; } -static notrace int print_bin_fmt(struct trace_iterator *iter) +static int print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1475,7 +1475,7 @@ static struct seq_operations tracer_seq_ops = { .show = s_show, }; -static struct trace_iterator notrace * +static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, int *ret) { struct trace_iterator *iter; @@ -1572,7 +1572,7 @@ static int tracing_lt_open(struct inode *inode, struct file *file) } -static notrace void * +static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct tracer *t = m->private; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index faf9f67246a..2b7352bf1ce 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -150,7 +150,7 @@ struct trace_iterator { long idx; }; -void notrace tracing_reset(struct trace_array_cpu *data); +void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void ftrace(struct trace_array *tr, @@ -189,10 +189,10 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -extern notrace cycle_t ftrace_now(int cpu); +extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER -extern void notrace +extern void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); #else static inline void diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 69a0eb00a0a..4165d34bd28 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -16,7 +16,7 @@ #include "trace.h" -static notrace void function_reset(struct trace_array *tr) +static void function_reset(struct trace_array *tr) { int cpu; @@ -26,30 +26,30 @@ static notrace void function_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } -static notrace void start_function_trace(struct trace_array *tr) +static void start_function_trace(struct trace_array *tr) { function_reset(tr); tracing_start_function_trace(); } -static notrace void stop_function_trace(struct trace_array *tr) +static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); } -static notrace void function_trace_init(struct trace_array *tr) +static void function_trace_init(struct trace_array *tr) { if (tr->ctrl) start_function_trace(tr); } -static notrace void function_trace_reset(struct trace_array *tr) +static void function_trace_reset(struct trace_array *tr) { if (tr->ctrl) stop_function_trace(tr); } -static notrace void function_trace_ctrl_update(struct trace_array *tr) +static void function_trace_ctrl_update(struct trace_array *tr) { if (tr->ctrl) start_function_trace(tr); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2ac0d09db6f..7a4dc014b8a 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -33,7 +33,7 @@ enum { static int trace_type __read_mostly; #ifdef CONFIG_PREEMPT_TRACER -static inline int notrace +static inline int preempt_trace(void) { return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count()); @@ -43,7 +43,7 @@ preempt_trace(void) #endif #ifdef CONFIG_IRQSOFF_TRACER -static inline int notrace +static inline int irq_trace(void) { return ((trace_type & TRACER_IRQS_OFF) && @@ -67,7 +67,7 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; /* * irqsoff uses its own tracer function to keep the overhead down: */ -static void notrace +static void irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = irqsoff_trace; @@ -109,7 +109,7 @@ static struct ftrace_ops trace_ops __read_mostly = /* * Should this new latency be reported/recorded? */ -static int notrace report_latency(cycle_t delta) +static int report_latency(cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) @@ -121,7 +121,7 @@ static int notrace report_latency(cycle_t delta) return 1; } -static void notrace +static void check_critical_timing(struct trace_array *tr, struct trace_array_cpu *data, unsigned long parent_ip, @@ -191,7 +191,7 @@ out: trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); } -static inline void notrace +static inline void start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; @@ -228,7 +228,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_dec(&data->disabled); } -static inline void notrace +static inline void stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; @@ -261,13 +261,13 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) } /* start and stop critical timings used to for stoppage (in idle) */ -void notrace start_critical_timings(void) +void start_critical_timings(void) { if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -void notrace stop_critical_timings(void) +void stop_critical_timings(void) { if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); @@ -275,13 +275,13 @@ void notrace stop_critical_timings(void) #ifdef CONFIG_IRQSOFF_TRACER #ifdef CONFIG_PROVE_LOCKING -void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) +void time_hardirqs_on(unsigned long a0, unsigned long a1) { if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } -void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) +void time_hardirqs_off(unsigned long a0, unsigned long a1) { if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); @@ -309,35 +309,35 @@ void trace_softirqs_off(unsigned long ip) { } -inline notrace void print_irqtrace_events(struct task_struct *curr) +inline void print_irqtrace_events(struct task_struct *curr) { } /* * We are only interested in hardirq on/off events: */ -void notrace trace_hardirqs_on(void) +void trace_hardirqs_on(void) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_on); -void notrace trace_hardirqs_off(void) +void trace_hardirqs_off(void) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_off); -void notrace trace_hardirqs_on_caller(unsigned long caller_addr) +void trace_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); -void notrace trace_hardirqs_off_caller(unsigned long caller_addr) +void trace_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); @@ -348,12 +348,12 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_IRQSOFF_TRACER */ #ifdef CONFIG_PREEMPT_TRACER -void notrace trace_preempt_on(unsigned long a0, unsigned long a1) +void trace_preempt_on(unsigned long a0, unsigned long a1) { stop_critical_timing(a0, a1); } -void notrace trace_preempt_off(unsigned long a0, unsigned long a1) +void trace_preempt_off(unsigned long a0, unsigned long a1) { start_critical_timing(a0, a1); } @@ -395,14 +395,14 @@ static void irqsoff_tracer_ctrl_update(struct trace_array *tr) stop_irqsoff_tracer(tr); } -static void notrace irqsoff_tracer_open(struct trace_iterator *iter) +static void irqsoff_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ if (iter->tr->ctrl) stop_irqsoff_tracer(iter->tr); } -static void notrace irqsoff_tracer_close(struct trace_iterator *iter) +static void irqsoff_tracer_close(struct trace_iterator *iter) { if (iter->tr->ctrl) start_irqsoff_tracer(iter->tr); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8d656672da9..b738eaca1db 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -17,7 +17,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; -static void notrace +static void ctx_switch_func(struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = ctx_trace; @@ -57,7 +57,7 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) wakeup_sched_switch(prev, next); } -static notrace void sched_switch_reset(struct trace_array *tr) +static void sched_switch_reset(struct trace_array *tr) { int cpu; @@ -67,18 +67,18 @@ static notrace void sched_switch_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } -static notrace void start_sched_trace(struct trace_array *tr) +static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); tracer_enabled = 1; } -static notrace void stop_sched_trace(struct trace_array *tr) +static void stop_sched_trace(struct trace_array *tr) { tracer_enabled = 0; } -static notrace void sched_switch_trace_init(struct trace_array *tr) +static void sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; @@ -86,7 +86,7 @@ static notrace void sched_switch_trace_init(struct trace_array *tr) start_sched_trace(tr); } -static notrace void sched_switch_trace_reset(struct trace_array *tr) +static void sched_switch_trace_reset(struct trace_array *tr) { if (tr->ctrl) stop_sched_trace(tr); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3549e4154f1..662679c78b6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -27,12 +27,12 @@ static unsigned wakeup_prio = -1; static DEFINE_SPINLOCK(wakeup_lock); -static void notrace __wakeup_reset(struct trace_array *tr); +static void __wakeup_reset(struct trace_array *tr); /* * Should this new latency be reported/recorded? */ -static int notrace report_latency(cycle_t delta) +static int report_latency(cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) @@ -44,7 +44,7 @@ static int notrace report_latency(cycle_t delta) return 1; } -void notrace +void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) { unsigned long latency = 0, t0 = 0, t1 = 0; @@ -126,7 +126,7 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -static void notrace __wakeup_reset(struct trace_array *tr) +static void __wakeup_reset(struct trace_array *tr) { struct trace_array_cpu *data; int cpu; @@ -147,7 +147,7 @@ static void notrace __wakeup_reset(struct trace_array *tr) wakeup_task = NULL; } -static void notrace wakeup_reset(struct trace_array *tr) +static void wakeup_reset(struct trace_array *tr) { unsigned long flags; @@ -156,7 +156,7 @@ static void notrace wakeup_reset(struct trace_array *tr) spin_unlock_irqrestore(&wakeup_lock, flags); } -static notrace void +static void wakeup_check_start(struct trace_array *tr, struct task_struct *p, struct task_struct *curr) { @@ -201,7 +201,7 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -notrace void +void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) @@ -210,7 +210,7 @@ ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) wakeup_check_start(wakeup_trace, wakee, curr); } -notrace void +void ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) @@ -219,7 +219,7 @@ ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) wakeup_check_start(wakeup_trace, wakee, curr); } -static notrace void start_wakeup_tracer(struct trace_array *tr) +static void start_wakeup_tracer(struct trace_array *tr) { wakeup_reset(tr); @@ -237,12 +237,12 @@ static notrace void start_wakeup_tracer(struct trace_array *tr) return; } -static notrace void stop_wakeup_tracer(struct trace_array *tr) +static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; } -static notrace void wakeup_tracer_init(struct trace_array *tr) +static void wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; @@ -250,7 +250,7 @@ static notrace void wakeup_tracer_init(struct trace_array *tr) start_wakeup_tracer(tr); } -static notrace void wakeup_tracer_reset(struct trace_array *tr) +static void wakeup_tracer_reset(struct trace_array *tr) { if (tr->ctrl) { stop_wakeup_tracer(tr); @@ -267,14 +267,14 @@ static void wakeup_tracer_ctrl_update(struct trace_array *tr) stop_wakeup_tracer(tr); } -static void notrace wakeup_tracer_open(struct trace_iterator *iter) +static void wakeup_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ if (iter->tr->ctrl) stop_wakeup_tracer(iter->tr); } -static void notrace wakeup_tracer_close(struct trace_iterator *iter) +static void wakeup_tracer_close(struct trace_iterator *iter) { /* forget about any processes we were recording */ if (iter->tr->ctrl) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 546307de6e3..85715b86a34 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,7 +3,7 @@ #include #include -static notrace inline int trace_valid_entry(struct trace_entry *entry) +static inline int trace_valid_entry(struct trace_entry *entry) { switch (entry->type) { case TRACE_FN: -- cgit v1.2.3-70-g09d2 From 57422797dc009fc83766bcf230d29dbe6e08e21e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add wakeup events to sched tracer Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 42 +++++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 12 +++++++++++ kernel/trace/trace_sched_switch.c | 36 +++++++++++++++++++++++++++++++++ kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 1 + 5 files changed, 88 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f5898051fdd..192c1354a7e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -57,7 +57,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); static int tracer_enabled = 1; -static unsigned long trace_nr_entries = 16384UL; +static unsigned long trace_nr_entries = 65536UL; static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; @@ -87,6 +87,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, + TRACE_WAKE, TRACE_SPECIAL, __TRACE_LAST_TYPE @@ -711,6 +712,30 @@ tracing_sched_switch_trace(struct trace_array *tr, wake_up (&trace_wait); } +void +tracing_sched_wakeup_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *wakee, struct task_struct *curr, + unsigned long flags) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_WAKE; + entry->ctx.prev_pid = curr->pid; + entry->ctx.prev_prio = curr->prio; + entry->ctx.prev_state = curr->state; + entry->ctx.next_pid = wakee->pid; + entry->ctx.next_prio = wakee->prio; + spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up(&trace_wait); +} + #ifdef CONFIG_FTRACE static void function_trace_call(unsigned long ip, unsigned long parent_ip) @@ -1183,13 +1208,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_puts(s, ")\n"); break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); - trace_seq_printf(s, " %d:%d:%c --> %d:%d %s\n", + trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d %s\n", entry->ctx.prev_pid, entry->ctx.prev_prio, - S, + S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio, comm); @@ -1256,12 +1282,14 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - ret = trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, + entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio); if (!ret) @@ -1301,8 +1329,11 @@ static int print_raw_fmt(struct trace_iterator *iter) return 0; break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + if (entry->type == TRACE_WAKE) + S = '+'; ret = trace_seq_printf(s, "%d %d %c %d %d\n", entry->ctx.prev_pid, entry->ctx.prev_prio, @@ -1355,8 +1386,11 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + if (entry->type == TRACE_WAKE) + S = '+'; SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); SEQ_PUT_HEX_FIELD_RET(s, S); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2b7352bf1ce..90e0ba0f6eb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -164,6 +164,12 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags); void tracing_record_cmdline(struct task_struct *tsk); + +void tracing_sched_wakeup_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *wakee, + struct task_struct *cur, + unsigned long flags); void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, @@ -194,11 +200,17 @@ extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER extern void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); +extern void +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr); #else static inline void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) { } +static inline void +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +{ +} #endif #ifdef CONFIG_CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b738eaca1db..8b1cf1a3aee 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -41,6 +41,29 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) local_irq_restore(flags); } +static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) { tracing_record_cmdline(prev); @@ -57,6 +80,19 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) wakeup_sched_switch(prev, next); } +void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ + tracing_record_cmdline(curr); + + wakeup_func(wakee, curr); + + /* + * Chain to the wakeup tracer (this is a NOP if disabled): + */ + wakeup_sched_wakeup(wakee, curr); +} + static void sched_switch_reset(struct trace_array *tr) { int cpu; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 662679c78b6..87fa7b253b5 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -202,7 +202,7 @@ out: } void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) return; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 85715b86a34..39dd452647d 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -8,6 +8,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) switch (entry->type) { case TRACE_FN: case TRACE_CTX: + case TRACE_WAKE: return 1; } return 0; -- cgit v1.2.3-70-g09d2 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 + kernel/trace/Kconfig | 1 + kernel/trace/trace.c | 103 ++++++++++++++++++++++++++++++++++++++++--------- kernel/trace/trace.h | 11 ++++++ 4 files changed, 99 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1744c..0d3714e7110 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3f73a171024..eb1988ed84b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -10,6 +10,7 @@ config TRACER_MAX_TRACE config TRACING bool select DEBUG_FS + select STACKTRACE config FTRACE bool "Kernel Function Tracer" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 192c1354a7e..b4b1b4fe99f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -28,6 +28,8 @@ #include #include +#include + #include "trace.h" unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; @@ -88,6 +90,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, + TRACE_STACK, TRACE_SPECIAL, __TRACE_LAST_TYPE @@ -109,6 +112,7 @@ enum trace_iterator_flags { TRACE_ITER_HEX = 0x20, TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, }; #define TRACE_ITER_SYM_MASK \ @@ -124,10 +128,11 @@ static const char *trace_options[] = { "hex", "bin", "block", + "stacktrace", NULL }; -static unsigned trace_flags; +static unsigned trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_STACKTRACE; static DEFINE_SPINLOCK(ftrace_max_lock); @@ -657,7 +662,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void @@ -685,13 +690,39 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); +} + +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip) +{ + struct trace_entry *entry; + struct stack_trace trace; + + if (!(trace_flags & TRACE_ITER_STACKTRACE)) + return; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_STACK; + + memset(&entry->stack, 0, sizeof(entry->stack)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = skip; + trace.entries = entry->stack.caller; + + save_stack_trace(&trace); } void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *prev, struct task_struct *next, + struct task_struct *prev, + struct task_struct *next, unsigned long flags) { struct trace_entry *entry; @@ -706,16 +737,18 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *wakee, struct task_struct *curr, + struct task_struct *wakee, + struct task_struct *curr, unsigned long flags) { struct trace_entry *entry; @@ -730,6 +763,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.prev_state = curr->state; entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; + __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) @@ -1179,6 +1213,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned long rel_usecs; char *comm; int S; + int i; if (!next_entry) next_entry = entry; @@ -1197,8 +1232,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); } else { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); + if (entry->type != TRACE_STACK) { + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); + } } switch (entry->type) { case TRACE_FN: @@ -1226,6 +1263,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) entry->special.arg2, entry->special.arg3); break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) + trace_seq_puts(s, " <= "); + seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); + } + trace_seq_puts(s, "\n"); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1241,8 +1286,9 @@ static int print_trace_fmt(struct trace_iterator *iter) unsigned long long t; unsigned long secs; char *comm; - int S; int ret; + int S; + int i; entry = iter->ent; @@ -1252,15 +1298,17 @@ static int print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return 0; - ret = trace_seq_printf(s, "[%02d] ", iter->cpu); - if (!ret) - return 0; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return 0; + if (entry->type != TRACE_STACK) { + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; + } switch (entry->type) { case TRACE_FN: @@ -1303,6 +1351,22 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) { + ret = trace_seq_puts(s, " <= "); + if (!ret) + return 0; + } + ret = seq_print_ip_sym(s, entry->stack.caller[i], + sym_flags); + if (!ret) + return 0; + } + ret = trace_seq_puts(s, "\n"); + if (!ret) + return 0; + break; } return 1; } @@ -1344,6 +1408,7 @@ static int print_raw_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: + case TRACE_STACK: ret = trace_seq_printf(s, " %lx %lx %lx\n", entry->special.arg1, entry->special.arg2, @@ -1399,6 +1464,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); @@ -1433,6 +1499,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_FIELD_RET(s, entry->special.arg1); SEQ_PUT_FIELD_RET(s, entry->special.arg2); SEQ_PUT_FIELD_RET(s, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 90e0ba0f6eb..387bdcf45e2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,6 +34,16 @@ struct special_entry { unsigned long arg3; }; +/* + * Stack-trace entry: + */ + +#define FTRACE_STACK_ENTRIES 5 + +struct stack_entry { + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: @@ -51,6 +61,7 @@ struct trace_entry { struct ftrace_entry fn; struct ctx_switch_entry ctx; struct special_entry special; + struct stack_entry stack; }; }; -- cgit v1.2.3-70-g09d2 From 4c1f4d4f0175129934d5dbc19a39296430937a05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: make nostacktrace the default Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b4b1b4fe99f..0e4b7119e26 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ static const char *trace_options[] = { NULL }; -static unsigned trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_STACKTRACE; +static unsigned trace_flags = TRACE_ITER_PRINT_PARENT; static DEFINE_SPINLOCK(ftrace_max_lock); -- cgit v1.2.3-70-g09d2 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 ++++++++++++++++------- kernel/sched.c | 35 ++++++++++++++++++++++--- kernel/trace/trace.c | 55 ++++++++++++++++----------------------- kernel/trace/trace.h | 14 ++++++++++ kernel/trace/trace_sched_switch.c | 24 +++++++++++------ 5 files changed, 108 insertions(+), 52 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb09..652d380ae56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif diff --git a/kernel/sched.c b/kernel/sched.c index 53ab1174664..b9208a0e33a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2394,6 +2394,35 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + +void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) +{ + struct sched_entity *se; + struct task_struct *p; + struct rb_node *curr; + struct rq *rq = __rq; + + curr = first_fair(&rq->cfs); + if (!curr) + return; + + while (curr) { + se = rb_entry(curr, struct sched_entity, run_node); + if (!entity_is_task(se)) + continue; + + p = task_of(se); + + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + + curr = rb_next(curr); + } +} + +#endif + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2468,7 +2497,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2613,7 +2642,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2786,7 +2815,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(prev, next); + ftrace_ctx_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e4b7119e26..65173b14b91 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -66,7 +66,18 @@ static struct tracer *current_trace __read_mostly; static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); -static DECLARE_WAIT_QUEUE_HEAD (trace_wait); +static DECLARE_WAIT_QUEUE_HEAD(trace_wait); + +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; + +/* + * FIXME: where should this be called? + */ +void trace_wake_up(void) +{ + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up(&trace_wait); +} #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) @@ -103,18 +114,6 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x08, }; -enum trace_iterator_flags { - TRACE_ITER_PRINT_PARENT = 0x01, - TRACE_ITER_SYM_OFFSET = 0x02, - TRACE_ITER_SYM_ADDR = 0x04, - TRACE_ITER_VERBOSE = 0x08, - TRACE_ITER_RAW = 0x10, - TRACE_ITER_HEX = 0x20, - TRACE_ITER_BIN = 0x40, - TRACE_ITER_BLOCK = 0x80, - TRACE_ITER_STACKTRACE = 0x100, -}; - #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) @@ -132,8 +131,6 @@ static const char *trace_options[] = { NULL }; -static unsigned trace_flags = TRACE_ITER_PRINT_PARENT; - static DEFINE_SPINLOCK(ftrace_max_lock); /* @@ -660,9 +657,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -673,10 +667,14 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + void -trace_special(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { + struct trace_array_cpu *data = __data; + struct trace_array *tr = __tr; struct trace_entry *entry; unsigned long irq_flags; @@ -688,11 +686,10 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, @@ -739,9 +736,6 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_prio = next->prio; __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -765,9 +759,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } #ifdef CONFIG_FTRACE @@ -1258,7 +1249,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %lx %lx %lx\n", + trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1344,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1409,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 387bdcf45e2..75e23747567 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -274,4 +274,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); +extern unsigned long trace_flags; + +enum trace_iterator_flags { + TRACE_ITER_PRINT_PARENT = 0x01, + TRACE_ITER_SYM_OFFSET = 0x02, + TRACE_ITER_SYM_ADDR = 0x04, + TRACE_ITER_VERBOSE = 0x08, + TRACE_ITER_RAW = 0x10, + TRACE_ITER_HEX = 0x20, + TRACE_ITER_BIN = 0x40, + TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, +}; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8b1cf1a3aee..12658b3f2b2 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -18,7 +18,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; static void -ctx_switch_func(struct task_struct *prev, struct task_struct *next) +ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -34,14 +34,17 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_switch_trace(tr, data, prev, next, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) +static void +wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -57,14 +60,18 @@ static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +void +ftrace_ctx_switch(void *__rq, struct task_struct *prev, + struct task_struct *next) { tracing_record_cmdline(prev); @@ -72,7 +79,7 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. */ - ctx_switch_func(prev, next); + ctx_switch_func(__rq, prev, next); /* * Chain to the wakeup tracer (this is a NOP if disabled): @@ -81,11 +88,12 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) } void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +ftrace_wake_up_task(void *__rq, struct task_struct *wakee, + struct task_struct *curr) { tracing_record_cmdline(curr); - wakeup_func(wakee, curr); + wakeup_func(__rq, wakee, curr); /* * Chain to the wakeup tracer (this is a NOP if disabled): -- cgit v1.2.3-70-g09d2 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ kernel/sched.c | 18 ++++++++++++++++++ kernel/trace/trace.c | 15 +++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae56..a3970b56375 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 673b588b713..9ca4a2e6a23 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -642,6 +642,24 @@ static inline void update_rq_clock(struct rq *rq) # define const_debug static const #endif +/** + * runqueue_is_locked + * + * Returns true if the current cpu runqueue is locked. + * This interface allows printk to be called with the runqueue lock + * held and know whether or not it is OK to wake up the klogd. + */ +int runqueue_is_locked(void) +{ + int cpu = get_cpu(); + struct rq *rq = cpu_rq(cpu); + int ret; + + ret = spin_is_locked(&rq->lock); + put_cpu(); + return ret; +} + /* * Debugging: various feature bits */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 65173b14b91..2ca9d66aa74 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,12 +70,13 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -/* - * FIXME: where should this be called? - */ void trace_wake_up(void) { - if (!(trace_flags & TRACE_ITER_BLOCK)) + /* + * The runqueue_is_locked() can fail, but this is the best we + * have for now: + */ + if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) wake_up(&trace_wait); } @@ -657,6 +658,8 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } void @@ -686,6 +689,8 @@ __trace_special(void *__tr, void *__data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #endif @@ -759,6 +764,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #ifdef CONFIG_FTRACE -- cgit v1.2.3-70-g09d2 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- kernel/trace/trace.c | 4 ---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b56375..5b186bed54b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2ca9d66aa74..65d2c0a61ed 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -670,8 +670,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -#ifdef CONFIG_CONTEXT_SWITCH_TRACER - void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) @@ -693,8 +691,6 @@ __trace_special(void *__tr, void *__data, trace_wake_up(); } -#endif - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, -- cgit v1.2.3-70-g09d2 From 4ac3ba41d372e3a9e420b36bc43589662b188a14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: trace scheduler rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 1 + kernel/trace/trace.h | 1 + kernel/trace/trace_sched_switch.c | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 65d2c0a61ed..06380dc1ebe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -129,6 +129,7 @@ static const char *trace_options[] = { "bin", "block", "stacktrace", + "sched-tree", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 75e23747567..a52015702a2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -286,6 +286,7 @@ enum trace_iterator_flags { TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, TRACE_ITER_STACKTRACE = 0x100, + TRACE_ITER_SCHED_TREE = 0x200, }; #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 12658b3f2b2..5555b906a66 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -36,7 +36,8 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) if (likely(disabled == 1)) { tracing_sched_switch_trace(tr, data, prev, next, flags); - ftrace_all_fair_tasks(__rq, tr, data); + if (trace_flags & TRACE_ITER_SCHED_TREE) + ftrace_all_fair_tasks(__rq, tr, data); } atomic_dec(&data->disabled); @@ -62,7 +63,8 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) if (likely(disabled == 1)) { tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); - ftrace_all_fair_tasks(__rq, tr, data); + if (trace_flags & TRACE_ITER_SCHED_TREE) + ftrace_all_fair_tasks(__rq, tr, data); } atomic_dec(&data->disabled); -- cgit v1.2.3-70-g09d2 From c7078de1aaf562a31b20984409c38cc1b40fa8a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: add tracing_cpumask Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06380dc1ebe..18f0ab88ca8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,6 +70,23 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/* + * Only trace on a CPU if the bitmask is set: + */ +static cpumask_t tracing_cpumask __read_mostly = CPU_MASK_ALL; + +/* + * The tracer itself will not take this lock, but still we want + * to provide a consistent cpumask to user-space: + */ +static DEFINE_MUTEX(tracing_cpumask_update_lock); + +/* + * Temporary storage for the character representation of the + * CPU bitmask: + */ +static char mask_str[NR_CPUS]; + void trace_wake_up(void) { /* @@ -1754,9 +1771,49 @@ static struct file_operations tracing_lt_fops = { }; static struct file_operations show_traces_fops = { - .open = show_traces_open, - .read = seq_read, - .release = seq_release, + .open = show_traces_open, + .read = seq_read, + .release = seq_release, +}; + +static ssize_t +tracing_cpumask_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + int err; + + count = min(count, (size_t)NR_CPUS); + + mutex_lock(&tracing_cpumask_update_lock); + cpumask_scnprintf(mask_str, NR_CPUS, tracing_cpumask); + err = copy_to_user(ubuf, mask_str, count); + if (err) + count = -EFAULT; + mutex_unlock(&tracing_cpumask_update_lock); + + return count; +} + +static ssize_t +tracing_cpumask_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + int err; + + mutex_lock(&tracing_cpumask_update_lock); + err = cpumask_parse_user(ubuf, count, tracing_cpumask); + mutex_unlock(&tracing_cpumask_update_lock); + + if (err) + return err; + + return count; +} + +static struct file_operations tracing_cpumask_fops = { + .open = tracing_open_generic, + .read = tracing_cpumask_read, + .write = tracing_cpumask_write, }; static ssize_t @@ -1837,9 +1894,9 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, } static struct file_operations tracing_iter_fops = { - .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .open = tracing_open_generic, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, }; static const char readme_msg[] = @@ -1870,8 +1927,8 @@ tracing_readme_read(struct file *filp, char __user *ubuf, } static struct file_operations tracing_readme_fops = { - .open = tracing_open_generic, - .read = tracing_readme_read, + .open = tracing_open_generic, + .read = tracing_readme_read, }; static ssize_t @@ -2334,6 +2391,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); + entry = debugfs_create_file("latency_trace", 0444, d_tracer, &global_trace, &tracing_lt_fops); if (!entry) -- cgit v1.2.3-70-g09d2 From 36dfe9252bd4c9b55e8454363fb7e444c92c5030 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: make use of tracing_cpumask Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 95 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 31 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 18f0ab88ca8..3ae17e254fc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,23 +70,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -/* - * Only trace on a CPU if the bitmask is set: - */ -static cpumask_t tracing_cpumask __read_mostly = CPU_MASK_ALL; - -/* - * The tracer itself will not take this lock, but still we want - * to provide a consistent cpumask to user-space: - */ -static DEFINE_MUTEX(tracing_cpumask_update_lock); - -/* - * Temporary storage for the character representation of the - * CPU bitmask: - */ -static char mask_str[NR_CPUS]; - void trace_wake_up(void) { /* @@ -1776,19 +1759,46 @@ static struct file_operations show_traces_fops = { .release = seq_release, }; +/* + * Only trace on a CPU if the bitmask is set: + */ +static cpumask_t tracing_cpumask = CPU_MASK_ALL; + +/* + * When tracing/tracing_cpu_mask is modified then this holds + * the new bitmask we are about to install: + */ +static cpumask_t tracing_cpumask_new; + +/* + * The tracer itself will not take this lock, but still we want + * to provide a consistent cpumask to user-space: + */ +static DEFINE_MUTEX(tracing_cpumask_update_lock); + +/* + * Temporary storage for the character representation of the + * CPU bitmask (and one more byte for the newline): + */ +static char mask_str[NR_CPUS + 1]; + static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - int err; - - count = min(count, (size_t)NR_CPUS); + int len; mutex_lock(&tracing_cpumask_update_lock); - cpumask_scnprintf(mask_str, NR_CPUS, tracing_cpumask); - err = copy_to_user(ubuf, mask_str, count); - if (err) - count = -EFAULT; + + len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + if (count - len < 2) { + count = -EINVAL; + goto out_err; + } + len += sprintf(mask_str + len, "\n"); + count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); + +out_err: mutex_unlock(&tracing_cpumask_update_lock); return count; @@ -1798,16 +1808,40 @@ static ssize_t tracing_cpumask_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { - int err; + int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask); - mutex_unlock(&tracing_cpumask_update_lock); - + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) - return err; + goto err_unlock; + + spin_lock_irq(&ftrace_max_lock); + for_each_possible_cpu(cpu) { + /* + * Increase/decrease the disabled counter if we are + * about to flip a bit in the cpumask: + */ + if (cpu_isset(cpu, tracing_cpumask) && + !cpu_isset(cpu, tracing_cpumask_new)) { + atomic_inc(&global_trace.data[cpu]->disabled); + } + if (!cpu_isset(cpu, tracing_cpumask) && + cpu_isset(cpu, tracing_cpumask_new)) { + atomic_dec(&global_trace.data[cpu]->disabled); + } + } + spin_unlock_irq(&ftrace_max_lock); + + tracing_cpumask = tracing_cpumask_new; + + mutex_unlock(&tracing_cpumask_update_lock); return count; + +err_unlock: + mutex_unlock(&tracing_cpumask_update_lock); + + return err; } static struct file_operations tracing_cpumask_fops = { @@ -1846,8 +1880,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); - r = simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); -- cgit v1.2.3-70-g09d2 From 442e544ce52d4415a024920b84fb95c5f9aa0855 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: iter ctrl fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ae17e254fc..688b4cf72d9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1920,6 +1920,11 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, break; } } + /* + * If no option could be set, return an error: + */ + if (!trace_options[i]) + return -EINVAL; filp->f_pos += cnt; -- cgit v1.2.3-70-g09d2 From f29c73fe3404f8799ed57aaf48859e0b55fc071f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: include cpu in stacktrace Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 688b4cf72d9..3a4032492fc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1227,10 +1227,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); } else { - if (entry->type != TRACE_STACK) { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); - } + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); } switch (entry->type) { case TRACE_FN: @@ -1293,17 +1291,15 @@ static int print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - if (entry->type != TRACE_STACK) { - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return 0; - ret = trace_seq_printf(s, "[%02d] ", iter->cpu); - if (!ret) - return 0; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return 0; - } + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; switch (entry->type) { case TRACE_FN: -- cgit v1.2.3-70-g09d2 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ kernel/sched_fair.c | 3 +++ kernel/trace/trace.c | 6 +++--- kernel/trace/trace_sched_switch.c | 24 ++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54b..360ca99033d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b..dc1856f1079 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1061,6 +1061,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE)) return 0; + ftrace_special(__LINE__, curr->se.avg_overlap, sync); + ftrace_special(__LINE__, p->se.avg_overlap, -1); /* * If the currently running task will sleep within * a reasonable amount of time then attract this newly @@ -1238,6 +1240,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) if (unlikely(se == pse)) return; + ftrace_special(__LINE__, p->pid, se->last_wakeup); cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3a4032492fc..b87a2641489 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1251,7 +1251,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %ld %ld %ld\n", + trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1335,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1400,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5a217e86358..bddf676914e 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -103,6 +103,30 @@ ftrace_wake_up_task(void *__rq, struct task_struct *wakee, wakeup_sched_wakeup(wakee, curr); } +void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __trace_special(tr, data, arg1, arg2, arg3); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + static void sched_switch_reset(struct trace_array *tr) { int cpu; -- cgit v1.2.3-70-g09d2 From bac524d3f3dfeffa3a9d44f2c64035b88bcaacb4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: trace next state Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 35 +++++++++++++++++++++++++---------- kernel/trace/trace.h | 1 + 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b87a2641489..b63fe909f87 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -736,6 +736,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + entry->ctx.next_state = next->state; __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); } @@ -759,6 +760,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.prev_state = curr->state; entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; + entry->ctx.next_state = wakee->state; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); @@ -1207,7 +1209,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned long abs_usecs; unsigned long rel_usecs; char *comm; - int S; + int S, T; int i; if (!next_entry) @@ -1241,14 +1243,17 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; + comm = trace_find_cmdline(entry->ctx.next_pid); - trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d %s\n", + trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio, - comm); + T, comm); break; case TRACE_SPECIAL: trace_seq_printf(s, "# %ld %ld %ld\n", @@ -1280,7 +1285,7 @@ static int print_trace_fmt(struct trace_iterator *iter) unsigned long secs; char *comm; int ret; - int S; + int S, T; int i; entry = iter->ent; @@ -1324,13 +1329,16 @@ static int print_trace_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d\n", + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; + ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, - entry->ctx.next_prio); + entry->ctx.next_prio, + T); if (!ret) return 0; break; @@ -1367,7 +1375,7 @@ static int print_raw_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry; int ret; - int S; + int S, T; entry = iter->ent; @@ -1387,14 +1395,17 @@ static int print_raw_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; - ret = trace_seq_printf(s, "%d %d %c %d %d\n", + ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->ctx.next_pid, - entry->ctx.next_prio); + entry->ctx.next_prio, + T); if (!ret) return 0; break; @@ -1428,7 +1439,7 @@ static int print_hex_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; - int S; + int S, T; entry = iter->ent; @@ -1445,6 +1456,8 @@ static int print_hex_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); @@ -1453,6 +1466,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + SEQ_PUT_HEX_FIELD_RET(s, T); break; case TRACE_SPECIAL: case TRACE_STACK: @@ -1488,6 +1502,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); break; case TRACE_SPECIAL: case TRACE_STACK: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a52015702a2..96951a8d09a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -23,6 +23,7 @@ struct ctx_switch_entry { unsigned char prev_state; unsigned int next_pid; unsigned char next_prio; + unsigned char next_state; }; /* -- cgit v1.2.3-70-g09d2 From 8f96da02c14d722ad9a3713cd7273ce28c9036ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: remove wakeup from function trace trace_function is called by mcount and calling wake_up from that can have unpredictable results. This patch removes the wakeup from trace_function. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b63fe909f87..736dcfb3ed0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -659,8 +659,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); - - trace_wake_up(); } void -- cgit v1.2.3-70-g09d2 From 92205c2343527a863d660360599a4bf8cede77b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: user raw_spin_lock in tracing Lock debugging enabled cause huge performance problems for tracing. Having the lock verification happening for every function that is called because mcount calls spin_lock can cripple the system. This patch converts the spin_locks used by ftrace into raw_spin_locks. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 51 ++++++++++++++++++++++++++++++--------------------- kernel/trace/trace.h | 2 +- 2 files changed, 31 insertions(+), 22 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 736dcfb3ed0..3009aafa4dd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -133,7 +133,8 @@ static const char *trace_options[] = { NULL }; -static DEFINE_SPINLOCK(ftrace_max_lock); +static raw_spinlock_t ftrace_max_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* * Copy the new maximum trace into the separate maximum-trace @@ -335,7 +336,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) int i; WARN_ON_ONCE(!irqs_disabled()); - spin_lock(&ftrace_max_lock); + __raw_spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; @@ -344,7 +345,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) } __update_max_tr(tr, tsk, cpu); - spin_unlock(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); } /** @@ -360,7 +361,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) int i; WARN_ON_ONCE(!irqs_disabled()); - spin_lock(&ftrace_max_lock); + __raw_spin_lock(&ftrace_max_lock); for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); @@ -368,7 +369,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_reset(data); __update_max_tr(tr, tsk, cpu); - spin_unlock(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); } int register_tracer(struct tracer *type) @@ -652,13 +653,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); } void @@ -678,14 +681,16 @@ __trace_special(void *__tr, void *__data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_SPECIAL; entry->special.arg1 = arg1; entry->special.arg2 = arg2; entry->special.arg3 = arg3; - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -725,7 +730,8 @@ tracing_sched_switch_trace(struct trace_array *tr, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -736,7 +742,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_prio = next->prio; entry->ctx.next_state = next->state; __trace_stack(tr, data, flags, 4); - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); } void @@ -749,7 +756,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_WAKE; @@ -760,7 +768,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; entry->ctx.next_state = wakee->state; __trace_stack(tr, data, flags, 5); - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -1824,7 +1833,8 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - spin_lock_irq(&ftrace_max_lock); + raw_local_irq_disable(); + __raw_spin_lock(&ftrace_max_lock); for_each_possible_cpu(cpu) { /* * Increase/decrease the disabled counter if we are @@ -1839,7 +1849,8 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, atomic_dec(&global_trace.data[cpu]->disabled); } } - spin_unlock_irq(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); + raw_local_irq_enable(); tracing_cpumask = tracing_cpumask_new; @@ -2299,7 +2310,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - spin_lock(&data->lock); + __raw_spin_lock(&data->lock); } while (find_next_entry_inc(iter) != NULL) { @@ -2320,7 +2331,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - spin_unlock(&data->lock); + __raw_spin_unlock(&data->lock); } for_each_cpu_mask(cpu, mask) { @@ -2538,8 +2549,7 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; - spin_lock_init(&data->lock); - lockdep_set_class(&data->lock, &data->lock_key); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2547,8 +2557,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - spin_lock_init(&data->lock); - lockdep_set_class(&data->lock, &data->lock_key); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 98cbfd05d75..25cba28eb9b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -76,7 +76,7 @@ struct trace_entry { struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; - spinlock_t lock; + raw_spinlock_t lock; struct lock_class_key lock_key; /* these fields get copied into max-trace: */ -- cgit v1.2.3-70-g09d2 From d15f57f23eaba975309a153b23699cd0c0236974 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: fix mutex unlock in trace output If the trace output changes on reading the trace files, there is a chance that the start function will return NULL. If the start function of a sequence returns NULL the stop equivalent is not called. In this case, all locks that are taken must be released even if they are released in the stop function. This patch fixes a case that a mutex was not released on return of NULL in the start sequence function. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3009aafa4dd..ea11f4ebfae 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -964,8 +964,10 @@ static void *s_start(struct seq_file *m, loff_t *pos) mutex_lock(&trace_types_lock); - if (!current_trace || current_trace != iter->trace) + if (!current_trace || current_trace != iter->trace) { + mutex_unlock(&trace_types_lock); return NULL; + } atomic_inc(&trace_record_cmdline_disabled); -- cgit v1.2.3-70-g09d2 From 72829bc3d63cdc592d8f7dd86ad3b3fe8900fb74 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 May 2008 21:37:28 +0200 Subject: ftrace: move enums to ftrace.h and make helper function global picked from the mmiotracer patches to distangle the patch queues. Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 24 ++++++++---------------- kernel/trace/trace.h | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea11f4ebfae..0eef0503feb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -37,7 +37,7 @@ unsigned long __read_mostly tracing_thresh; static int tracing_disabled = 1; -static long +long ns2usecs(cycle_t nsec) { nsec += 500; @@ -96,18 +96,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -enum trace_type { - __TRACE_FIRST_TYPE = 0, - - TRACE_FN, - TRACE_CTX, - TRACE_WAKE, - TRACE_STACK, - TRACE_SPECIAL, - - __TRACE_LAST_TYPE -}; - enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED = 0x02, @@ -190,7 +178,7 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } -static int +int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; @@ -205,7 +193,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) va_end(ap); /* If we can't write it all, don't bother writing anything */ - if (ret > len) + if (ret >= len) return 0; s->len += ret; @@ -638,7 +626,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) pc = preempt_count(); entry->preempt_count = pc & 0xff; - entry->pid = tsk->pid; + entry->pid = (tsk) ? tsk->pid : 0; entry->t = ftrace_now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | @@ -1541,6 +1529,9 @@ static int trace_empty(struct trace_iterator *iter) static int print_trace_line(struct trace_iterator *iter) { + if (iter->trace && iter->trace->print_line) + return iter->trace->print_line(iter); + if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); @@ -2162,6 +2153,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; iter->tr = &global_trace; + iter->trace = current_trace; filp->private_data = iter; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 25cba28eb9b..b0ca7473671 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -6,6 +6,18 @@ #include #include +enum trace_type { + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_CTX, + TRACE_WAKE, + TRACE_STACK, + TRACE_SPECIAL, + + __TRACE_LAST_TYPE +}; + /* * Function trace entry - function address and parent function addres: */ @@ -130,6 +142,7 @@ struct tracer { int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif + int (*print_line)(struct trace_iterator *iter); struct tracer *next; int print_max; }; @@ -276,6 +289,8 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; -- cgit v1.2.3-70-g09d2 From d17d969160c18b631a19c2b34d260691402650f8 Mon Sep 17 00:00:00 2001 From: Ankita Garg Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: fix conversion of task state to char in latency tracer The conversion of task states to a character in the sched_switch tracer (part of latency tracer infrastructure), seems to be incorrect. We currently do it by indexing into the state_to_char array using the state value. The state values do not map directly into the array index and are thus incorrect. The following patch addresses this issue. This is also what is being done even in the show_task() routine in kernel/sched.c The patch has been compile and run tested. Signed-off-by: Ankita Garg Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0eef0503feb..9197782d15c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1208,6 +1208,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) char *comm; int S, T; int i; + unsigned state; if (!next_entry) next_entry = entry; @@ -1238,11 +1239,11 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; case TRACE_CTX: case TRACE_WAKE: - S = entry->ctx.prev_state < sizeof(state_to_char) ? - state_to_char[entry->ctx.prev_state] : 'X'; T = entry->ctx.next_state < sizeof(state_to_char) ? state_to_char[entry->ctx.next_state] : 'X'; + state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; + S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", entry->ctx.prev_pid, -- cgit v1.2.3-70-g09d2 From 8487c23765b6e0444ec6b5f1530766d63fe68e35 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: allow trace_pipe to block on all reads We expect things like "cat" to block on reads to trace_pipe. That's what trace_pipe is for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9197782d15c..fd4ecc29200 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2231,8 +2231,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { - if (!(trace_flags & TRACE_ITER_BLOCK)) - return -EWOULDBLOCK; /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: -- cgit v1.2.3-70-g09d2 From b5685aede3b7b65e72ddc73b951aa1f70798a614 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: restore iterator trace in pipe read The trace iterator is reset in the read. We still need to restore the tracer that the trace_pipe was opened with. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd4ecc29200..d141fc98f3a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2202,6 +2202,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, { struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; + struct trace_array *tr = iter->tr; + struct tracer *tracer = iter->trace; static cpumask_t mask; static int start; unsigned long flags; @@ -2274,7 +2276,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cnt = PAGE_SIZE - 1; memset(iter, 0, sizeof(*iter)); - iter->tr = &global_trace; + iter->tr = tr; + iter->trace = tracer; iter->pos = -1; /* -- cgit v1.2.3-70-g09d2 From 845279972f1736c3463c9cebd1bad92a0a347176 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: return EOF in trace_pipe on change of tracer Break out of while loop with EOF when the current_trace changes. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d141fc98f3a..2af940433e9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2253,6 +2253,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (signal_pending(current)) return -EINTR; + if (iter->trace != current_trace) + return 0; + /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never -- cgit v1.2.3-70-g09d2 From 2dc8f09571a61d9cb3dc47bba6608689dfe15d16 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: trace_pipe implement NONBLOCK This patch implements "NONBLOCK" for trace_pipe. If the trace_pipe is opened with O_NONBLOCK, then the trace_pipe read will not block when buffer is empty. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2af940433e9..3b4eaf36ed5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2233,6 +2233,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { + + if ((filp->f_flags & O_NONBLOCK)) + return -EAGAIN; + /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: -- cgit v1.2.3-70-g09d2 From a98a3c3fde3ae7614f19758a043691b6f59dac53 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:59 +0200 Subject: ftrace: trace_entries to dynamically change trace buffer size This patch adds /debug/tracing/trace_entries that allows users to see as well as modify the number of trace entries the buffers hold. The number of entries only increments in ENTRIES_PER_PAGE which is calculated by the size of an entry with the number of entries that can fit in a page. The user does not need to use an exact size, but the entries will be rounded to one of the increments. Trying to set the entries to 0 will return with -EINVAL. To avoid race conditions, the modification of the buffer size can only be done when tracing is completely disabled (current_tracer == none). A info message will be printed if a user tries to modify the buffer size when not set to none. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 137 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3b4eaf36ed5..4723e012151 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,6 +35,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* dummy trace to disable tracing */ +static struct tracer no_tracer __read_mostly = +{ + .name = "none", +}; + +static int trace_alloc_page(void); +static int trace_free_page(void); + static int tracing_disabled = 1; long @@ -2364,6 +2373,70 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, return read; } +static ssize_t +tracing_entries_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%lu\n", tr->entries); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +tracing_entries_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + /* must have at least 1 entry */ + if (!val) + return -EINVAL; + + mutex_lock(&trace_types_lock); + + if (current_trace != &no_tracer) { + cnt = -EBUSY; + pr_info("ftrace: set current_tracer to none" + " before modifying buffer size\n"); + goto out; + } + + if (val > global_trace.entries) { + while (global_trace.entries < val) { + if (trace_alloc_page()) { + cnt = -ENOMEM; + goto out; + } + } + } else { + /* include the number of entries in val (inc of page entries) */ + while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) + trace_free_page(); + } + + filp->f_pos += cnt; + + out: + max_tr.entries = global_trace.entries; + mutex_unlock(&trace_types_lock); + + return cnt; +} + static struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -2389,6 +2462,12 @@ static struct file_operations tracing_pipe_fops = { .release = tracing_release_pipe, }; +static struct file_operations tracing_entries_fops = { + .open = tracing_open_generic, + .read = tracing_entries_read, + .write = tracing_entries_write, +}; + #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t @@ -2500,6 +2579,12 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'tracing_threash' entry\n"); + entry = debugfs_create_file("trace_entries", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, @@ -2510,12 +2595,6 @@ static __init void tracer_init_debugfs(void) #endif } -/* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = -{ - .name = "none", -}; - static int trace_alloc_page(void) { struct trace_array_cpu *data; @@ -2552,7 +2631,6 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; - data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2560,7 +2638,6 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2579,6 +2656,55 @@ static int trace_alloc_page(void) return -ENOMEM; } +static int trace_free_page(void) +{ + struct trace_array_cpu *data; + struct page *page; + struct list_head *p; + int i; + int ret = 0; + + /* free one page from each buffer */ + for_each_possible_cpu(i) { + data = global_trace.data[i]; + p = data->trace_pages.next; + if (p == &data->trace_pages) { + /* should never happen */ + WARN_ON(1); + tracing_disabled = 1; + ret = -1; + break; + } + page = list_entry(p, struct page, lru); + ClearPageLRU(page); + list_del(&page->lru); + __free_page(page); + + tracing_reset(data); + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + p = data->trace_pages.next; + if (p == &data->trace_pages) { + /* should never happen */ + WARN_ON(1); + tracing_disabled = 1; + ret = -1; + break; + } + page = list_entry(p, struct page, lru); + ClearPageLRU(page); + list_del(&page->lru); + __free_page(page); + + tracing_reset(data); +#endif + } + global_trace.entries -= ENTRIES_PER_PAGE; + + return ret; +} + __init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; @@ -2609,6 +2735,9 @@ __init static int tracer_alloc_buffers(void) /* use the LRU flag to differentiate the two buffers */ ClearPageLRU(page); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + /* Only allocate if we are actually using the max trace */ #ifdef CONFIG_TRACER_MAX_TRACE array = (void *)__get_free_page(GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 93dcc6ea096c51cc30ad0f6647ed05fead2439bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: simplify hexprint simplify hex to ascii conversion. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4723e012151..627e39936ea 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -248,19 +248,18 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) } #define HEX_CHARS 17 +static const char hex2asc[] = "0123456789abcdef"; static int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) { unsigned char hex[HEX_CHARS]; - unsigned char *data; + unsigned char *data = mem; unsigned char byte; int i, j; BUG_ON(len >= HEX_CHARS); - data = mem; - #ifdef __BIG_ENDIAN for (i = 0, j = 0; i < len; i++) { #else @@ -268,22 +267,10 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) #endif byte = data[i]; - hex[j] = byte & 0x0f; - if (hex[j] >= 10) - hex[j] += 'a' - 10; - else - hex[j] += '0'; - j++; - - hex[j] = byte >> 4; - if (hex[j] >= 10) - hex[j] += 'a' - 10; - else - hex[j] += '0'; - j++; + hex[j++] = hex2asc[byte & 0x0f]; + hex[j++] = hex2asc[byte >> 4]; } - hex[j] = ' '; - j++; + hex[j++] = ' '; return trace_seq_putmem(s, hex, j); } -- cgit v1.2.3-70-g09d2 From afc2abc0ae4265730a0fc48618012193a09a1d10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: cleanups no code changed. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 627e39936ea..d6b60576f99 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1155,12 +1155,12 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (hardirq && softirq) + if (hardirq && softirq) { trace_seq_putc(s, 'H'); - else { - if (hardirq) + } else { + if (hardirq) { trace_seq_putc(s, 'h'); - else { + } else { if (softirq) trace_seq_putc(s, 's'); else @@ -2177,8 +2177,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) * Always select as readable when in blocking mode */ return POLLIN | POLLRDNORM; - } - else { + } else { if (!trace_empty(iter)) return POLLIN | POLLRDNORM; poll_wait(filp, &trace_wait, poll_table); -- cgit v1.2.3-70-g09d2 From cffae437cdfbc8a5370d38cefbff1dfe34dad6ca Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: simple clean ups Andrew Morton mentioned some clean ups that should be done to ftrace. This patch does some of the simple clean ups. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d6b60576f99..0a367362ba2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -36,8 +36,7 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; /* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = -{ +static struct tracer no_tracer __read_mostly = { .name = "none", }; @@ -1906,8 +1905,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, int neg = 0; int i; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -1999,8 +1998,8 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2099,10 +2098,10 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = snprintf(buf, 64, "%ld\n", + r = snprintf(buf, sizeof(buf), "%ld\n", *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); - if (r > 64) - r = 64; + if (r > sizeof(buf)) + r = sizeof(buf); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2114,8 +2113,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2378,8 +2377,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From c6caeeb142cd3a03c46107aac45c8effc02bbadb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: replace simple_strtoul with strict_strtoul Andrew Morton suggested using strict_strtoul over simple_strtoul. This patch replaces them in ftrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0a367362ba2..290e9da7aa9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -92,9 +92,16 @@ void trace_wake_up(void) static int __init set_nr_entries(char *str) { + unsigned long nr_entries; + int ret; + if (!str) return 0; - trace_nr_entries = simple_strtoul(str, &str, 0); + ret = strict_strtoul(str, 0, &nr_entries); + /* nr_entries can not be zero */ + if (ret < 0 || nr_entries == 0) + return 0; + trace_nr_entries = nr_entries; return 1; } __setup("trace_entries=", set_nr_entries); @@ -1995,8 +2002,9 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - long val; char buf[64]; + long val; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2006,7 +2014,9 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; val = !!val; @@ -2110,8 +2120,9 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { long *ptr = filp->private_data; - long val; char buf[64]; + long val; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2121,7 +2132,9 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; *ptr = val * 1000; @@ -2376,6 +2389,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2385,7 +2399,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; /* must have at least 1 entry */ if (!val) -- cgit v1.2.3-70-g09d2 From ab46428c6969d50ecf6f6e97b7a84abba6274368 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: modulize the number of CPU buffers Currently ftrace allocates a trace buffer for every possible CPU. Work is being done to change it to only online CPUs and add hooks to hotplug CPUS. This patch lays out the infrastructure for such a change. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 290e9da7aa9..5da391c5fb0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,6 +35,12 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +static unsigned long __read_mostly tracing_nr_buffers; +static cpumask_t __read_mostly tracing_buffer_mask; + +#define for_each_tracing_cpu(cpu) \ + for_each_cpu_mask(cpu, tracing_buffer_mask) + /* dummy trace to disable tracing */ static struct tracer no_tracer __read_mostly = { .name = "none", @@ -328,7 +334,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; flip_trace(max_tr.data[i], data); tracing_reset(data); @@ -352,7 +358,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); - for_each_possible_cpu(i) + for_each_tracing_cpu(i) tracing_reset(max_tr.data[i]); flip_trace(max_tr.data[cpu], data); @@ -398,7 +404,7 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; if (!head_page(data)) continue; @@ -417,7 +423,7 @@ int register_tracer(struct tracer *type) goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; if (!head_page(data)) continue; @@ -847,7 +853,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) int next_cpu = -1; int cpu; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); @@ -972,7 +978,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->prev_ent = NULL; iter->prev_cpu = -1; - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { iter->next_idx[i] = 0; iter->next_page[i] = NULL; } @@ -1089,7 +1095,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) if (type) name = type->name; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { if (head_page(tr->data[cpu])) { total += tr->data[cpu]->trace_idx; if (tr->data[cpu]->trace_idx > tr->entries) @@ -1519,7 +1525,7 @@ static int trace_empty(struct trace_iterator *iter) struct trace_array_cpu *data; int cpu; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { data = iter->tr->data[cpu]; if (head_page(data) && data->trace_idx && @@ -1831,7 +1837,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, raw_local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { /* * Increase/decrease the disabled counter if we are * about to flip a bit in the cpumask: @@ -2308,7 +2314,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, ftrace_enabled = 0; #endif smp_wmb(); - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { data = iter->tr->data[cpu]; if (!head_page(data) || !data->trace_idx) @@ -2605,7 +2611,7 @@ static int trace_alloc_page(void) int i; /* first allocate a page for each CPU */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { printk(KERN_ERR "tracer: failed to allocate page" @@ -2630,7 +2636,7 @@ static int trace_alloc_page(void) } /* Now that we successfully allocate a page per CPU, add them */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i]; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); @@ -2666,7 +2672,7 @@ static int trace_free_page(void) int ret = 0; /* free one page from each buffer */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i]; p = data->trace_pages.next; if (p == &data->trace_pages) { @@ -2717,8 +2723,12 @@ __init static int tracer_alloc_buffers(void) global_trace.ctrl = tracer_enabled; + /* TODO: make the number of buffers hot pluggable with CPUS */ + tracing_nr_buffers = num_possible_cpus(); + tracing_buffer_mask = cpu_possible_map; + /* Allocate the first page for all buffers */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); max_tr.data[i] = &per_cpu(max_data, i); -- cgit v1.2.3-70-g09d2 From 4fcdae83cebda24b519a89d3dd976081fff1ca80 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: comment code This is first installment of adding documentation to the ftrace. Expect many more patches of this kind in the near future. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++- kernel/trace/trace.h | 7 +++ 2 files changed, 141 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5da391c5fb0..a102b11eacf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -64,26 +64,79 @@ cycle_t ftrace_now(int cpu) return cpu_clock(cpu); } +/* + * The global_trace is the descriptor that holds the tracing + * buffers for the live tracing. For each CPU, it contains + * a link list of pages that will store trace entries. The + * page descriptor of the pages in the memory is used to hold + * the link list by linking the lru item in the page descriptor + * to each of the pages in the buffer per CPU. + * + * For each active CPU there is a data field that holds the + * pages for the buffer for that CPU. Each CPU has the same number + * of pages allocated for its buffer. + */ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +/* + * The max_tr is used to snapshot the global_trace when a maximum + * latency is reached. Some tracers will use this to store a maximum + * trace while it continues examining live traces. + * + * The buffers for the max_tr are set up the same as the global_trace. + * When a snapshot is taken, the link list of the max_tr is swapped + * with the link list of the global_trace and the buffers are reset for + * the global_trace so the tracing can continue. + */ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); +/* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; + +/* + * trace_nr_entries is the number of entries that is allocated + * for a buffer. Note, the number of entries is always rounded + * to ENTRIES_PER_PAGE. + */ static unsigned long trace_nr_entries = 65536UL; +/* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; + +/* current_trace points to the tracer that is currently active */ static struct tracer *current_trace __read_mostly; + +/* + * max_tracer_type_len is used to simplify the allocating of + * buffers to read userspace tracer names. We keep track of + * the longest tracer name registered. + */ static int max_tracer_type_len; +/* + * trace_types_lock is used to protect the trace_types list. + * This lock is also used to keep user access serialized. + * Accesses from userspace will grab this lock while userspace + * activities happen inside the kernel. + */ static DEFINE_MUTEX(trace_types_lock); + +/* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); +/* trace_flags holds iter_ctrl options */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/** + * trace_wake_up - wake up tasks waiting for trace input + * + * Simply wakes up any task that is blocked on the trace_wait + * queue. These is used with trace_poll for tasks polling the trace. + */ void trace_wake_up(void) { /* @@ -117,6 +170,14 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } +/* + * trace_flag_type is an enumeration that holds different + * states when a trace occurs. These are: + * IRQS_OFF - interrupts were disabled + * NEED_RESCED - reschedule is requested + * HARDIRQ - inside an interrupt handler + * SOFTIRQ - inside a softirq handler + */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED = 0x02, @@ -124,10 +185,14 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x08, }; +/* + * TRACE_ITER_SYM_MASK masks the options in trace_flags that + * control the output of kernel symbols. + */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) -/* These must match the bit postions above */ +/* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", "sym-offset", @@ -142,6 +207,15 @@ static const char *trace_options[] = { NULL }; +/* + * ftrace_max_lock is used to protect the swapping of buffers + * when taking a max snapshot. The buffers themselves are + * protected by per_cpu spinlocks. But the action of the swap + * needs its own lock. + * + * This is defined as a raw_spinlock_t in order to help + * with performance when lockdep debugging is enabled. + */ static raw_spinlock_t ftrace_max_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -172,6 +246,13 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +/** + * check_pages - integrity check of trace buffers + * + * As a safty measure we check to make sure the data pages have not + * been corrupted. TODO: configure to disable this because it adds + * a bit of overhead. + */ void check_pages(struct trace_array_cpu *data) { struct page *page, *tmp; @@ -185,6 +266,13 @@ void check_pages(struct trace_array_cpu *data) } } +/** + * head_page - page address of the first page in per_cpu buffer. + * + * head_page returns the page address of the first page in + * a per_cpu buffer. This also preforms various consistency + * checks to make sure the buffer has not been corrupted. + */ void *head_page(struct trace_array_cpu *data) { struct page *page; @@ -199,6 +287,17 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { @@ -222,6 +321,16 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ static int trace_seq_puts(struct trace_seq *s, const char *str) { @@ -304,6 +413,13 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) trace_seq_reset(s); } +/* + * flip the trace buffers between two trace descriptors. + * This usually is the buffers between the global_trace and + * the max_tr to record a snapshot of a current trace. + * + * The ftrace_max_lock must be held. + */ static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { @@ -325,6 +441,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) check_pages(tr2); } +/** + * update_max_tr - snapshot all trace buffers from global_trace to max_tr + * @tr: tracer + * @tsk: the task with the latency + * @cpu: The cpu that initiated the trace. + * + * Flip the buffers between the @tr and the max_tr and record information + * about which task was the cause of this latency. + */ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { @@ -349,6 +474,8 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) * @tr - tracer * @tsk - task with the latency * @cpu - the cpu of the buffer to copy. + * + * Flip the trace of a single CPU buffer between the @tr and the max_tr. */ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) @@ -368,6 +495,12 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __raw_spin_unlock(&ftrace_max_lock); } +/** + * register_tracer - register a tracer with the ftrace system. + * @type - the plugin for the tracer + * + * Register a new plugin tracer. + */ int register_tracer(struct tracer *type) { struct tracer *t; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0ca7473671..21c29ee13e5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -294,6 +294,13 @@ extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; +/* + * trace_iterator_flags is an enumeration that defines bit + * positions into trace_flags that controls the output. + * + * NOTE: These bits must match the trace_options array in + * trace.c. + */ enum trace_iterator_flags { TRACE_ITER_PRINT_PARENT = 0x01, TRACE_ITER_SYM_OFFSET = 0x02, -- cgit v1.2.3-70-g09d2 From 25b0b44a1c732ccfc58095cdd8438955a0a19fff Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: fix comm on function trace output In cleaning up of the sched_switch code, the function trace recording of task comms was removed. This patch adds back the recording of comms for function trace. The output of ftrace now has the task comm instead of <...>. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 7 ++++++- kernel/trace/trace.h | 2 ++ kernel/trace/trace_functions.c | 2 ++ kernel/trace/trace_sched_switch.c | 7 +++++-- 4 files changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a102b11eacf..1281969103b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -620,7 +620,12 @@ static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; static DEFINE_SPINLOCK(trace_cmdline_lock); -atomic_t trace_record_cmdline_disabled; + +/* trace in all context switches */ +atomic_t trace_record_cmdline_enabled __read_mostly; + +/* temporary disable recording */ +atomic_t trace_record_cmdline_disabled __read_mostly; static void trace_init_cmdlines(void) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 21c29ee13e5..8991c5efcc7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -216,6 +216,8 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; extern unsigned long tracing_thresh; +extern atomic_t trace_record_cmdline_enabled; + void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 4165d34bd28..0a084656d7c 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -29,12 +29,14 @@ static void function_reset(struct trace_array *tr) static void start_function_trace(struct trace_array *tr) { function_reset(tr); + atomic_inc(&trace_record_cmdline_enabled); tracing_start_function_trace(); } static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); + atomic_dec(&trace_record_cmdline_enabled); } static void function_trace_init(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5671db0e182..a3376478fc2 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,8 +29,6 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) if (!tracer_enabled) return; - tracing_record_cmdline(prev); - local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -73,6 +71,9 @@ void ftrace_ctx_switch(void *__rq, struct task_struct *prev, struct task_struct *next) { + if (unlikely(atomic_read(&trace_record_cmdline_enabled))) + tracing_record_cmdline(prev); + /* * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. @@ -134,11 +135,13 @@ static void sched_switch_reset(struct trace_array *tr) static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); + atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; } static void stop_sched_trace(struct trace_array *tr) { + atomic_dec(&trace_record_cmdline_enabled); tracer_enabled = 0; } -- cgit v1.2.3-70-g09d2 From 53d0aa773053ab18287781e25d52c5faf9e0e09e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: add logic to record overruns This patch sets up the infrastructure to record overruns of the tracing buffer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 16 +++++++++++----- kernel/trace/trace.h | 6 +++++- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1281969103b..b9126ef46a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -609,6 +609,7 @@ void unregister_tracer(struct tracer *type) void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; + data->overrun = 0; data->trace_head = data->trace_tail = head_page(data); data->trace_head_idx = 0; data->trace_tail_idx = 0; @@ -750,6 +751,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) if (data->trace_head == data->trace_tail && idx_next == data->trace_tail_idx) { /* overrun */ + data->overrun++; data->trace_tail_idx++; if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { data->trace_tail = @@ -2353,8 +2355,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, { struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; - struct trace_array *tr = iter->tr; - struct tracer *tracer = iter->trace; static cpumask_t mask; static int start; unsigned long flags; @@ -2433,10 +2433,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (cnt >= PAGE_SIZE) cnt = PAGE_SIZE - 1; - memset(iter, 0, sizeof(*iter)); - iter->tr = tr; - iter->trace = tracer; + /* reset all but tr, trace, and overruns */ iter->pos = -1; + memset(&iter->seq, 0, + sizeof(struct trace_iterator) - + offsetof(struct trace_iterator, seq)); /* * We need to stop all tracing on all CPUS to read the @@ -2465,6 +2466,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; __raw_spin_lock(&data->lock); + + if (data->overrun > iter->last_overrun[cpu]) + iter->overrun[cpu] += + data->overrun - iter->last_overrun[cpu]; + iter->last_overrun[cpu] = data->overrun; } while (find_next_entry_inc(iter) != NULL) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8991c5efcc7..c1ec134ac35 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -97,6 +97,7 @@ struct trace_array_cpu { void *trace_head; /* producer */ void *trace_tail; /* consumer */ unsigned long trace_idx; + unsigned long overrun; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -157,10 +158,13 @@ struct trace_seq { * results to users and which routines might sleep, etc: */ struct trace_iterator { - struct trace_seq seq; struct trace_array *tr; struct tracer *trace; + long last_overrun[NR_CPUS]; + long overrun[NR_CPUS]; + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; struct trace_entry *ent; int cpu; -- cgit v1.2.3-70-g09d2 From 107bad8bef5ab2c3a3bff7648c18c9dc3abdc13b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: add trace pipe header pluggin This patch adds a method for open_pipe and open_read to the pluggins so that they can add a header to the trace pipe call. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 38 +++++++++++++++++++++++++++++++------- kernel/trace/trace.h | 5 +++++ 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b9126ef46a9..32f9106d612 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2307,11 +2307,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (!iter) return -ENOMEM; + mutex_lock(&trace_types_lock); iter->tr = &global_trace; iter->trace = current_trace; - filp->private_data = iter; + if (iter->trace->pipe_open) + iter->trace->pipe_open(iter); + mutex_unlock(&trace_types_lock); + return 0; } @@ -2380,13 +2384,24 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, return cnt; } + mutex_lock(&trace_types_lock); + if (iter->trace->read) { + ret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (ret) { + read = ret; + goto out; + } + } + trace_seq_reset(&iter->seq); start = 0; while (trace_empty(iter)) { - if ((filp->f_flags & O_NONBLOCK)) - return -EAGAIN; + if ((filp->f_flags & O_NONBLOCK)) { + read = -EAGAIN; + goto out; + } /* * This is a make-shift waitqueue. The reason we don't use @@ -2400,16 +2415,22 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, set_current_state(TASK_INTERRUPTIBLE); iter->tr->waiter = current; + mutex_unlock(&trace_types_lock); + /* sleep for one second, and try again. */ schedule_timeout(HZ); + mutex_lock(&trace_types_lock); + iter->tr->waiter = NULL; - if (signal_pending(current)) - return -EINTR; + if (signal_pending(current)) { + read = -EINTR; + goto out; + } if (iter->trace != current_trace) - return 0; + goto out; /* * We block until we read something and tracing is disabled. @@ -2428,7 +2449,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* stop when tracing is finished */ if (trace_empty(iter)) - return 0; + goto out; if (cnt >= PAGE_SIZE) cnt = PAGE_SIZE - 1; @@ -2518,6 +2539,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (ret) read = -EFAULT; +out: + mutex_unlock(&trace_types_lock); + return read; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c1ec134ac35..ee53d706066 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -135,9 +135,13 @@ struct tracer { void (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); + void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*start)(struct trace_iterator *iter); void (*stop)(struct trace_iterator *iter); + ssize_t (*read)(struct trace_iterator *iter, + struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); void (*ctrl_update)(struct trace_array *tr); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, @@ -160,6 +164,7 @@ struct trace_seq { struct trace_iterator { struct trace_array *tr; struct tracer *trace; + void *private; long last_overrun[NR_CPUS]; long overrun[NR_CPUS]; -- cgit v1.2.3-70-g09d2 From 4823ed7eadf35e4b57ce581327e21d39585f1f32 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: fix setting of pos in read_pipe In resetting the iterator in read_pipe, the reset of pos was postitioned in the wrong location with respect to the memset operation. The current code sets pos, incorrectly, to zero. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 32f9106d612..49e16630628 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2455,10 +2455,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cnt = PAGE_SIZE - 1; /* reset all but tr, trace, and overruns */ - iter->pos = -1; memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + iter->pos = -1; /* * We need to stop all tracing on all CPUS to read the -- cgit v1.2.3-70-g09d2 From 9fe068e92f6290e89e19adc521441661a1229f00 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: trace faster Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 49e16630628..ca0d6ff74c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2417,8 +2417,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, mutex_unlock(&trace_types_lock); - /* sleep for one second, and try again. */ - schedule_timeout(HZ); + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ/10); mutex_lock(&trace_types_lock); -- cgit v1.2.3-70-g09d2 From a4feb8348b62fe76a63cdb5569f5c920f5283c06 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: special stacktrace Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca0d6ff74c1..c232d8248a0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -808,29 +808,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - struct trace_array_cpu *data = __data; - struct trace_array *tr = __tr; - struct trace_entry *entry; - unsigned long irq_flags; - - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_SPECIAL; - entry->special.arg1 = arg1; - entry->special.arg2 = arg2; - entry->special.arg3 = arg3; - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); - - trace_wake_up(); -} - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, @@ -856,6 +833,30 @@ void __trace_stack(struct trace_array *tr, save_stack_trace(&trace); } +void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array_cpu *data = __data; + struct trace_array *tr = __tr; + struct trace_entry *entry; + unsigned long irq_flags; + + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_SPECIAL; + entry->special.arg1 = arg1; + entry->special.arg2 = arg2; + entry->special.arg3 = arg3; + __trace_stack(tr, data, irq_flags, 4); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); + + trace_wake_up(); +} + void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, -- cgit v1.2.3-70-g09d2 From 6c6c27969a4c6024e6c8838829546c02aaddca18 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: add readpos to struct trace_seq; add trace_seq_to_user() Refactor code from tracing_read_pipe() and create trace_seq_to_user(). Moved trace_seq_reset() call before iter->trace->read() call so that when all leftover data is returned, trace_seq is reset automatically. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 73 ++++++++++++++++++++++++++-------------------------- kernel/trace/trace.h | 3 +++ 2 files changed, 39 insertions(+), 37 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c232d8248a0..82ced406aac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -400,6 +400,26 @@ static void trace_seq_reset(struct trace_seq *s) { s->len = 0; + s->readpos = 0; +} + +ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) +{ + int len; + int ret; + + if (s->len <= s->readpos) + return -EBUSY; + + len = s->len - s->readpos; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); + if (ret) + return -EFAULT; + + s->readpos += len; + return cnt; } static void @@ -2361,46 +2381,32 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; static cpumask_t mask; - static int start; unsigned long flags; #ifdef CONFIG_FTRACE int ftrace_save; #endif - int read = 0; int cpu; - int len; - int ret; + ssize_t sret; /* return any leftover data */ - if (iter->seq.len > start) { - len = iter->seq.len - start; - if (cnt > len) - cnt = len; - ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt); - if (ret) - cnt = -EFAULT; - - start += len; + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) + return sret; + sret = 0; - return cnt; - } + trace_seq_reset(&iter->seq); mutex_lock(&trace_types_lock); if (iter->trace->read) { - ret = iter->trace->read(iter, filp, ubuf, cnt, ppos); - if (ret) { - read = ret; + sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (sret) goto out; - } } - trace_seq_reset(&iter->seq); - start = 0; - while (trace_empty(iter)) { if ((filp->f_flags & O_NONBLOCK)) { - read = -EAGAIN; + sret = -EAGAIN; goto out; } @@ -2426,7 +2432,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, iter->tr->waiter = NULL; if (signal_pending(current)) { - read = -EINTR; + sret = -EINTR; goto out; } @@ -2496,6 +2502,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } while (find_next_entry_inc(iter) != NULL) { + int ret; int len = iter->seq.len; ret = print_trace_line(iter); @@ -2526,24 +2533,16 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, local_irq_restore(flags); /* Now copy what we have to the user */ - read = iter->seq.len; - if (read > cnt) - read = cnt; - - ret = copy_to_user(ubuf, iter->seq.buffer, read); - - if (read < iter->seq.len) - start = read; - else + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (iter->seq.readpos >= iter->seq.len) trace_seq_reset(&iter->seq); - - if (ret) - read = -EFAULT; + if (sret == -EBUSY) + sret = 0; out: mutex_unlock(&trace_types_lock); - return read; + return sret; } static ssize_t diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ee53d706066..8845033ab49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -155,6 +155,7 @@ struct tracer { struct trace_seq { unsigned char buffer[PAGE_SIZE]; unsigned int len; + unsigned int readpos; }; /* @@ -301,6 +302,8 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; -- cgit v1.2.3-70-g09d2 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/writeback.h | 2 ++ kernel/trace/trace.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 10 +++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc28..bd91987c065 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82ced406aac..2824cf48cdc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -51,6 +52,8 @@ static int trace_free_page(void); static int tracing_disabled = 1; +static unsigned long tracing_pages_allocated; + long ns2usecs(cycle_t nsec) { @@ -2591,12 +2594,41 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } if (val > global_trace.entries) { + long pages_requested; + unsigned long freeable_pages; + + /* make sure we have enough memory before mapping */ + pages_requested = + (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE; + + /* account for each buffer (and max_tr) */ + pages_requested *= tracing_nr_buffers * 2; + + /* Check for overflow */ + if (pages_requested < 0) { + cnt = -ENOMEM; + goto out; + } + + freeable_pages = determine_dirtyable_memory(); + + /* we only allow to request 1/4 of useable memory */ + if (pages_requested > + ((freeable_pages + tracing_pages_allocated) / 4)) { + cnt = -ENOMEM; + goto out; + } + while (global_trace.entries < val) { if (trace_alloc_page()) { cnt = -ENOMEM; goto out; } + /* double check that we don't go over the known pages */ + if (tracing_pages_allocated > pages_requested) + break; } + } else { /* include the number of entries in val (inc of page entries) */ while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) @@ -2776,6 +2808,7 @@ static int trace_alloc_page(void) struct page *page, *tmp; LIST_HEAD(pages); void *array; + unsigned pages_allocated = 0; int i; /* first allocate a page for each CPU */ @@ -2787,6 +2820,7 @@ static int trace_alloc_page(void) goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); @@ -2798,6 +2832,7 @@ static int trace_alloc_page(void) "for trace buffer!\n"); goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); #endif @@ -2819,6 +2854,7 @@ static int trace_alloc_page(void) SetPageLRU(page); #endif } + tracing_pages_allocated += pages_allocated; global_trace.entries += ENTRIES_PER_PAGE; return 0; @@ -2853,6 +2889,8 @@ static int trace_free_page(void) page = list_entry(p, struct page, lru); ClearPageLRU(page); list_del(&page->lru); + tracing_pages_allocated--; + tracing_pages_allocated--; __free_page(page); tracing_reset(data); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 789b6adbef3..b38f700825f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages); static struct prop_descriptor vm_completions; static struct prop_descriptor vm_dirties; -static unsigned long determine_dirtyable_memory(void); - /* * couple the period to the dirty_ratio: * @@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) #endif } -static unsigned long determine_dirtyable_memory(void) +/** + * determine_dirtyable_memory - amount of memory that may be used + * + * Returns the numebr of pages that can currently be freed and used + * by the kernel for direct mappings. + */ +unsigned long determine_dirtyable_memory(void) { unsigned long x; -- cgit v1.2.3-70-g09d2 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ kernel/semaphore.c | 2 ++ kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e7110..017ab44d572 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 5c2942e768c..1a064adab65 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -31,6 +31,7 @@ #include #include #include +#include static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); @@ -53,6 +54,7 @@ void down(struct semaphore *sem) { unsigned long flags; + ftrace_special(sem->count, 0, __LINE__); spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) sem->count--; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2824cf48cdc..3271916ff03 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -901,7 +901,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; entry->ctx.next_state = next->state; - __trace_stack(tr, data, flags, 4); + __trace_stack(tr, data, flags, 5); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); } @@ -927,7 +927,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; entry->ctx.next_state = wakee->state; - __trace_stack(tr, data, flags, 5); + __trace_stack(tr, data, flags, 6); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5de0601b40..c460e85e94e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -51,7 +51,7 @@ struct special_entry { * Stack-trace entry: */ -#define FTRACE_STACK_ENTRIES 5 +#define FTRACE_STACK_ENTRIES 8 struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; -- cgit v1.2.3-70-g09d2 From d618b3e6e50970a6248ac857653fdd49bcd3c045 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: sysprof updates make the sample period configurable. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 3 ++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_sysprof.c | 70 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916ff03..95b7c48a9a1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2800,6 +2800,9 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); #endif +#ifdef CONFIG_SYSPROF_TRACER + init_tracer_sysprof_debugfs(d_tracer); +#endif } static int trace_alloc_page(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b2198bc830a..b7f85d9c80d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -188,6 +188,8 @@ struct trace_iterator { void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); +void init_tracer_sysprof_debugfs(struct dentry *d_tracer); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index f9a09fe705b..19406236b67 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -20,11 +20,12 @@ static struct trace_array *sysprof_trace; static int __read_mostly tracer_enabled; /* - * 10 msecs for now: + * 1 msec sample interval by default: */ -static const unsigned long sample_period = 1000000; +static unsigned long sample_period = 1000000; static const unsigned int sample_max_depth = 512; +static DEFINE_MUTEX(sample_timer_lock); /* * Per CPU hrtimers that do the profiling: */ @@ -166,15 +167,19 @@ static notrace void stack_reset(struct trace_array *tr) static notrace void start_stack_trace(struct trace_array *tr) { + mutex_lock(&sample_timer_lock); stack_reset(tr); start_stack_timers(); tracer_enabled = 1; + mutex_unlock(&sample_timer_lock); } static notrace void stop_stack_trace(struct trace_array *tr) { + mutex_lock(&sample_timer_lock); stop_stack_timers(); tracer_enabled = 0; + mutex_unlock(&sample_timer_lock); } static notrace void stack_trace_init(struct trace_array *tr) @@ -216,3 +221,64 @@ __init static int init_stack_trace(void) return register_tracer(&stack_trace); } device_initcall(init_stack_trace); + +#define MAX_LONG_DIGITS 22 + +static ssize_t +sysprof_sample_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + int r; + + r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +sysprof_sample_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + unsigned long val; + + if (cnt > MAX_LONG_DIGITS-1) + cnt = MAX_LONG_DIGITS-1; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + /* + * Enforce a minimum sample period of 100 usecs: + */ + if (val < 100) + val = 100; + + mutex_lock(&sample_timer_lock); + stop_stack_timers(); + sample_period = val * 1000; + start_stack_timers(); + mutex_unlock(&sample_timer_lock); + + return cnt; +} + +static struct file_operations sysprof_sample_fops = { + .read = sysprof_sample_read, + .write = sysprof_sample_write, +}; + +void init_tracer_sysprof_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + + entry = debugfs_create_file("sysprof_sample_period", 0644, + d_tracer, NULL, &sysprof_sample_fops); + if (entry) + return; + pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); +} -- cgit v1.2.3-70-g09d2 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 2 +- arch/x86/kernel/mmiotrace/mmio-mod.c | 140 ++++++++++---------------------- include/linux/mmiotrace.h | 85 ++++++-------------- kernel/trace/trace.c | 34 ++++++++ kernel/trace/trace.h | 14 ++++ kernel/trace/trace_mmiotrace.c | 151 ++++++++++++++++++++++++++++------- 6 files changed, 238 insertions(+), 188 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7e4b8494078..1d6de0d67f9 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY + depends on DEBUG_KERNEL select TRACING select MMIOTRACE_HOOKS default y diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index c7a67d7e482..62abc281a51 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -37,11 +37,6 @@ #define NAME "mmiotrace: " -/* This app's relay channel files will appear in /debug/mmio-trace */ -static const char APP_DIR[] = "mmio-trace"; -/* the marker injection file in /debug/APP_DIR */ -static const char MARKER_FILE[] = "mmio-marker"; - struct trap_reason { unsigned long addr; unsigned long ip; @@ -56,18 +51,15 @@ struct remap_trace { unsigned long id; }; -static const size_t subbuf_size = 256*1024; - /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); #if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); #endif -static struct dentry *dir; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); @@ -82,24 +74,21 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that chan is valid. + * - is_enabled() guarantees that mmio_trace_record is allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ /* module parameters */ -static unsigned int n_subbufs = 32*4; static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); @@ -110,6 +99,7 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } +#if 0 /* XXX: needs rewrite */ /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -145,6 +135,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, kfree(event); return len; } +#endif static void print_pte(unsigned long address) { @@ -198,9 +189,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -212,23 +204,17 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_reason->addr = addr; my_reason->ip = instptr; - my_trace->header.type = MMIO_MAGIC; - my_trace->header.pid = 0; - my_trace->header.data_len = sizeof(struct mm_io_rw); - my_trace->rw.address = addr; - /* - * struct remap_trace *trace = p->user_data; - * phys = addr - trace->probe.addr + trace->phys; - */ + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - my_trace->rw.pc = instptr; + my_trace->pc = instptr; else - my_trace->rw.pc = 0; + my_trace->pc = 0; /* * XXX: the timestamp recorded will be *after* the tracing has been @@ -238,28 +224,25 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, switch (type) { case REG_READ: - my_trace->header.type |= - (MMIO_READ << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); break; case REG_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_reg_val(instptr, regs); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_imm_val(instptr); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - my_trace->header.type |= - (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | *(ip + 2); } } @@ -271,7 +254,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ my_reason->active_traces--; @@ -282,20 +265,13 @@ static void post(struct kmmio_probe *p, unsigned long condition, switch (my_reason->type) { case REG_READ: - my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); + my_trace->value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - /* - * XXX: Several required values are ignored: - * - mapping id - * - program counter - * Also the address should be physical, not virtual. - */ - mmio_trace_record(my_trace->header.type, my_trace->rw.address, - my_trace->rw.value); + mmio_trace_rw(my_trace); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } @@ -305,21 +281,11 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_PROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = offset, - .addr = (unsigned long)addr, - .len = size, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE }; if (!trace) { @@ -338,15 +304,13 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .phys = offset, .id = atomic_inc_return(&next_id) }; + map.map_id = trace->id; spin_lock_irq(&trace_lock); if (!is_enabled()) goto not_enabled; - /* - * XXX: Insufficient data recorded! - */ - mmio_trace_record(event.header.type, event.map.addr, event.map.len); + mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -369,21 +333,11 @@ mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) static void iounmap_trace_core(volatile void __iomem *addr) { - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = 0, - .addr = (unsigned long)addr, - .len = 0, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE }; struct remap_trace *trace; struct remap_trace *tmp; @@ -404,8 +358,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - mmio_trace_record(event.header.type, event.map.addr, - found_trace ? found_trace->id : -1); + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); not_enabled: spin_unlock_irq(&trace_lock); @@ -448,10 +402,12 @@ static void clear_trace_list(void) } } +#if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; +#endif void enable_mmiotrace(void) { @@ -464,9 +420,9 @@ void enable_mmiotrace(void) #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); -#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); +#endif if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); @@ -502,17 +458,3 @@ void disable_mmiotrace(void) out: mutex_unlock(&mmiotrace_mutex); } - -int __init init_mmiotrace(void) -{ - pr_debug(NAME "load...\n"); - if (n_subbufs < 2) - return -EINVAL; - - dir = debugfs_create_dir(APP_DIR, NULL); - if (!dir) { - pr_err(NAME "Couldn't create relay app directory.\n"); - return -ENOMEM; - } - return 0; -} diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90..c88a9c197d2 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916ff03..d14fe49e963 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -831,6 +831,40 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_MMIOTRACE +void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_rw *rw) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_RW; + entry->mmiorw = *rw; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} + +void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_map *map) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_MAP; + entry->mmiomap = *map; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85e94e..0ef9ef74c80 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -5,6 +5,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -14,6 +15,8 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_SPECIAL, + TRACE_MMIO_RW, + TRACE_MMIO_MAP, __TRACE_LAST_TYPE }; @@ -75,6 +78,8 @@ struct trace_entry { struct ctx_switch_entry ctx; struct special_entry special; struct stack_entry stack; + struct mmiotrace_rw mmiorw; + struct mmiotrace_map mmiomap; }; }; @@ -255,6 +260,15 @@ extern unsigned long ftrace_update_tot_cnt; extern int DYN_FTRACE_TEST_NAME(void); #endif +#ifdef CONFIG_MMIOTRACE +extern void __trace_mmiotrace_rw(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_rw *rw); +extern void __trace_mmiotrace_map(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_map *map); +#endif + #ifdef CONFIG_FTRACE_STARTUP_TEST #ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index e4dd03cc5aa..3a12b1ad0c6 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -11,19 +11,26 @@ #include "trace.h" -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); - static struct trace_array *mmio_trace_array; +static void mmio_reset_data(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); + } } static void mmio_trace_reset(struct trace_array *tr) @@ -31,15 +38,110 @@ static void mmio_trace_reset(struct trace_array *tr) pr_debug("in %s\n", __func__); if (tr->ctrl) disable_mmiotrace(); + mmio_reset_data(tr); + mmio_trace_array = NULL; } static void mmio_trace_ctrl_update(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); - else + } else { disable_mmiotrace(); + } +} + +/* XXX: This is not called for trace_pipe file! */ +void mmio_print_header(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + trace_seq_printf(s, "VERSION broken 20070824\n"); + /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ +} + +static int mmio_print_rw(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_rw *rw = &entry->mmiorw; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_READ: + ret = trace_seq_printf(s, + "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_WRITE: + ret = trace_seq_printf(s, + "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_UNKNOWN_OP: + ret = trace_seq_printf(s, + "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, rw->phys, + (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, + (rw->value >> 0) & 0xff, rw->pc, entry->pid); + break; + default: + ret = trace_seq_printf(s, "rw what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +static int mmio_print_map(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_map *m = &entry->mmiomap; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_PROBE: + ret = trace_seq_printf(s, + "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + 0UL, entry->pid); + break; + case MMIO_UNPROBE: + ret = trace_seq_printf(s, + "UNMAP %lu.%06lu %d 0x%lx %d\n", + secs, usec_rem, m->map_id, 0UL, entry->pid); + break; + default: + ret = trace_seq_printf(s, "map what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +/* return 0 to abort printing without consuming current entry in pipe mode */ +static int mmio_print_line(struct trace_iterator *iter) +{ + switch (iter->ent->type) { + case TRACE_MMIO_RW: + return mmio_print_rw(iter); + case TRACE_MMIO_MAP: + return mmio_print_map(iter); + default: + return 1; /* ignore unknown entries */ + } } static struct tracer mmio_tracer __read_mostly = @@ -47,38 +149,31 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .open = mmio_print_header, .ctrl_update = mmio_trace_ctrl_update, + .print_line = mmio_print_line, }; __init static int init_mmio_trace(void) { - int ret = init_mmiotrace(); - if (ret) - return ret; return register_tracer(&mmio_tracer); } device_initcall(init_mmio_trace); -void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; struct trace_array_cpu *data = tr->data[smp_processor_id()]; + __trace_mmiotrace_rw(tr, data, rw); +} - if (!current || current->pid == 0) { - /* - * XXX: This is a problem. We need to able to record, no - * matter what. tracing_generic_entry_update() would crash. - */ - static unsigned limit; - if (limit++ < 12) - pr_err("Error in %s: no current.\n", __func__); - return; - } - if (!tr || !data) { - static unsigned limit; - if (limit++ < 12) - pr_err("%s: no tr or data\n", __func__); - return; - } - __trace_special(tr, data, type, addr, arg); +void mmio_trace_mapping(struct mmiotrace_map *map) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data; + + preempt_disable(); + data = tr->data[smp_processor_id()]; + __trace_mmiotrace_map(tr, data, map); + preempt_enable(); } -- cgit v1.2.3-70-g09d2 From 801a175bf601f9a9d5e86e92dee9adeeb6625da8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:58 +0200 Subject: mmiotrace: ftrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d14fe49e963..4dcc4e85c5d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,12 +838,16 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_RW; entry->mmiorw = *rw; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -854,12 +858,16 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_MAP; entry->mmiomap = *map; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } -- cgit v1.2.3-70-g09d2 From 19384c0314342222b18d4c7f09cdce1ca74dfd2a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:16 -0400 Subject: ftrace: limit use of check pages The check_pages function is called often enough that it can cause problems with trace outputs or even bringing the system to a halt. This patch limits the check_pages to the places that are most likely to have problems. The check is made at the flip between the global array and the max save array, as well as when the size of the buffers changes and the self tests. This patch also removes the BUG_ON from check_pages and replaces it with a WARN_ON and disabling of the tracer. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 32 +++++++++++++++++++++++--------- kernel/trace/trace_selftest.c | 1 + 2 files changed, 24 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916ff03..0567f51bbea 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -249,24 +249,32 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +#define CHECK_COND(cond) \ + if (unlikely(cond)) { \ + tracing_disabled = 1; \ + WARN_ON(1); \ + return -1; \ + } + /** * check_pages - integrity check of trace buffers * * As a safty measure we check to make sure the data pages have not - * been corrupted. TODO: configure to disable this because it adds - * a bit of overhead. + * been corrupted. */ -void check_pages(struct trace_array_cpu *data) +int check_pages(struct trace_array_cpu *data) { struct page *page, *tmp; - BUG_ON(data->trace_pages.next->prev != &data->trace_pages); - BUG_ON(data->trace_pages.prev->next != &data->trace_pages); + CHECK_COND(data->trace_pages.next->prev != &data->trace_pages); + CHECK_COND(data->trace_pages.prev->next != &data->trace_pages); list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - BUG_ON(page->lru.next->prev != &page->lru); - BUG_ON(page->lru.prev->next != &page->lru); + CHECK_COND(page->lru.next->prev != &page->lru); + CHECK_COND(page->lru.prev->next != &page->lru); } + + return 0; } /** @@ -280,7 +288,6 @@ void *head_page(struct trace_array_cpu *data) { struct page *page; - check_pages(data); if (list_empty(&data->trace_pages)) return NULL; @@ -2566,7 +2573,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; + int i, ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2635,8 +2642,15 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, trace_free_page(); } + /* check integrity */ + for_each_tracing_cpu(i) + check_pages(global_trace.data[i]); + filp->f_pos += cnt; + /* If check pages failed, return ENOMEM */ + if (tracing_disabled) + cnt = -ENOMEM; out: max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 3877dd9102f..18c5423bc97 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -28,6 +28,7 @@ trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) page = list_entry(data->trace_pages.next, struct page, lru); entries = page_address(page); + check_pages(data); if (head_page(data) != entries) goto failed; -- cgit v1.2.3-70-g09d2 From 4902f8849da6d2805bd291551a6dfd48f1b4f604 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:18 -0400 Subject: ftrace: move ftrace_special to trace.c Move the ftrace_special out of sched_switch to trace.c. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 27 +++++++++++++++++++++++++-- kernel/trace/trace_sched_switch.c | 24 ------------------------ 2 files changed, 25 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0567f51bbea..583fe24903d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -941,6 +941,30 @@ tracing_sched_wakeup_trace(struct trace_array *tr, trace_wake_up(); } +void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __trace_special(tr, data, arg1, arg2, arg3); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + #ifdef CONFIG_FTRACE static void function_trace_call(unsigned long ip, unsigned long parent_ip) @@ -2941,8 +2965,6 @@ __init static int tracer_alloc_buffers(void) int ret = -ENOMEM; int i; - global_trace.ctrl = tracer_enabled; - /* TODO: make the number of buffers hot pluggable with CPUS */ tracing_nr_buffers = num_possible_cpus(); tracing_buffer_mask = cpu_possible_map; @@ -3012,6 +3034,7 @@ __init static int tracer_alloc_buffers(void) current_trace = &no_tracer; /* All seems OK, enable tracing */ + global_trace.ctrl = tracer_enabled; tracing_disabled = 0; return 0; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index d25ffa5eaf2..798ec0dc863 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -125,30 +125,6 @@ wake_up_callback(void *probe_data, void *call_data, wakeup_func(probe_data, __rq, task, curr); } -void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - struct trace_array *tr = ctx_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - - if (!tracer_enabled) - return; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - __trace_special(tr, data, arg1, arg2, arg3); - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - static void sched_switch_reset(struct trace_array *tr) { int cpu; -- cgit v1.2.3-70-g09d2 From 41bc8144d02028133bcd1d545023c6f49e8b2411 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:49:22 -0400 Subject: ftrace: fix up cmdline recording The new work with converting the trace hooks over to markers broke the command line recording of ftrace. This patch fixes it again. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Signed-off-by: Thomas Gleixner --- kernel/trace/trace.c | 3 --- kernel/trace/trace.h | 4 ++-- kernel/trace/trace_functions.c | 4 ++-- kernel/trace/trace_sched_switch.c | 21 +++++++++++++++------ 4 files changed, 19 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 583fe24903d..0feae23d989 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -652,9 +652,6 @@ static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; static DEFINE_SPINLOCK(trace_cmdline_lock); -/* trace in all context switches */ -atomic_t trace_record_cmdline_enabled __read_mostly; - /* temporary disable recording */ atomic_t trace_record_cmdline_disabled __read_mostly; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85e94e..6b8bd8800d0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -218,6 +218,8 @@ void trace_function(struct trace_array *tr, void tracing_start_function_trace(void); void tracing_stop_function_trace(void); +void tracing_start_cmdline_record(void); +void tracing_stop_cmdline_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); @@ -226,8 +228,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; extern unsigned long tracing_thresh; -extern atomic_t trace_record_cmdline_enabled; - void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 0a084656d7c..7ee7dcd76b7 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -29,14 +29,14 @@ static void function_reset(struct trace_array *tr) static void start_function_trace(struct trace_array *tr) { function_reset(tr); - atomic_inc(&trace_record_cmdline_enabled); + tracing_start_cmdline_record(); tracing_start_function_trace(); } static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); - atomic_dec(&trace_record_cmdline_enabled); + tracing_stop_cmdline_record(); } static void function_trace_init(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 798ec0dc863..c16935d3bc5 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,6 +29,9 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev, long disabled; int cpu; + tracing_record_cmdline(prev); + tracing_record_cmdline(next); + if (!tracer_enabled) return; @@ -63,8 +66,6 @@ sched_switch_callback(void *probe_data, void *call_data, prev = va_arg(*args, typeof(prev)); next = va_arg(*args, typeof(next)); - tracing_record_cmdline(prev); - /* * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. @@ -213,18 +214,26 @@ void tracing_stop_sched_switch(void) tracing_sched_unregister(); } +void tracing_start_cmdline_record(void) +{ + tracing_start_sched_switch(); +} + +void tracing_stop_cmdline_record(void) +{ + tracing_stop_sched_switch(); +} + static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); - atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; - tracing_start_sched_switch(); + tracing_start_cmdline_record(); } static void stop_sched_trace(struct trace_array *tr) { - tracing_stop_sched_switch(); - atomic_dec(&trace_record_cmdline_enabled); + tracing_stop_cmdline_record(); tracer_enabled = 0; } -- cgit v1.2.3-70-g09d2 From 76094a2cf46e4ab776055d4086615b884408568c Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Wed, 28 May 2008 00:03:18 +0530 Subject: ftrace: distinguish kretprobe'd functions in trace logs Tracing functions via ftrace which have a kretprobe installed on them, can produce misleading output in their trace logs. E.g, consider the correct trace of the following sequence: do_IRQ() { ~ irq_enter(); ~ } Trace log (sample): -0 [00] 4154504455.781616: irq_enter <- do_IRQ But if irq_enter() has a kretprobe installed on it, the return value stored on the stack at each invocation is modified to divert the return to a kprobe trampoline function called kretprobe_trampoline(). So with this the trace would (currently) look like: -0 [00] 4154504455.781616: irq_enter <- kretprobe_trampoline Now this is quite misleading to the end user, as it suggests something that didn't actually happen. So just to avoid such misinterpretations, the inlined patch aims to output such a log as: -0 [00] 4154504455.781616: irq_enter <- [unknown/kretprobe'd] Signed-off-by: Abhishek Sagar Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0feae23d989..12f5e817380 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1199,6 +1200,20 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } +#define KRETPROBE_MSG "[unknown/kretprobe'd]" + +#ifdef CONFIG_KRETPROBES +static inline int kretprobed(unsigned long addr) +{ + return addr == (unsigned long)kretprobe_trampoline; +} +#else +static inline int kretprobed(unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_KRETPROBES */ + static int seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { @@ -1434,7 +1449,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) case TRACE_FN: seq_print_ip_sym(s, entry->fn.ip, sym_flags); trace_seq_puts(s, " ("); - seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + if (kretprobed(entry->fn.parent_ip)) + trace_seq_puts(s, KRETPROBE_MSG); + else + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; case TRACE_CTX: @@ -1514,8 +1532,11 @@ static int print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, " <-"); if (!ret) return 0; - ret = seq_print_ip_sym(s, entry->fn.parent_ip, - sym_flags); + if (kretprobed(entry->fn.parent_ip)) + ret = trace_seq_puts(s, KRETPROBE_MSG); + else + ret = seq_print_ip_sym(s, entry->fn.parent_ip, + sym_flags); if (!ret) return 0; } -- cgit v1.2.3-70-g09d2 From 2b1bce1787700768cbc87c8509851c6f49d252dc Mon Sep 17 00:00:00 2001 From: Ankita Garg Date: Mon, 9 Jun 2008 14:10:25 +0530 Subject: ftrace: disable tracing when current_tracer is set to "none" Found that inspite of setting the current_tracer to "none", trace from the previous trace type continued to be collected. The patch below fixes this and causes the trace to be disabled when the "none" type is selected. Compile and boot tested the patch for functionality. Signed-off-by: Ankita Garg Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 12f5e817380..dde6f0ace6d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,11 +43,6 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -/* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = { - .name = "none", -}; - static int trace_alloc_page(void); static int trace_free_page(void); @@ -135,6 +130,23 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds iter_ctrl options */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +static notrace void no_trace_init(struct trace_array *tr) +{ + int cpu; + + if(tr->ctrl) + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); + tracer_enabled = 0; +} + +/* dummy trace to disable tracing */ +static struct tracer no_tracer __read_mostly = { + .name = "none", + .init = no_trace_init +}; + + /** * trace_wake_up - wake up tasks waiting for trace input * -- cgit v1.2.3-70-g09d2 From 20764ff1efb440640353053ec83263e69e1259e0 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 11:27:03 +0200 Subject: ftrace: fix printout Do not print loglevel before "entries of %ld bytes". Move it to the previous pr_info. Signed-off-by: Jiri Slaby Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dde6f0ace6d..6e9dae7eb41 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3051,9 +3051,8 @@ __init static int tracer_alloc_buffers(void) } max_tr.entries = global_trace.entries; - pr_info("tracer: %d pages allocated for %ld", - pages, trace_nr_entries); - pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); + pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", + pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); pr_info(" actual entries %ld\n", global_trace.entries); tracer_init_debugfs(); -- cgit v1.2.3-70-g09d2 From ecea656d1d5e912d2f3d332657ea4a6d8380f891 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:53 +0530 Subject: ftrace: freeze kprobe'd records Let records identified as being kprobe'd be marked as "frozen". The trouble with records which have a kprobe installed on their mcount call-site is that they don't get updated. So if such a function which is currently being traced gets its tracing disabled due to a new filter rule (or because it was added to the notrace list) then it won't be updated and continue being traced. This patch allows scanning of all frozen records during tracing to check if they should be traced. Signed-off-by: Abhishek Sagar Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++- kernel/trace/ftrace.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++- kernel/trace/trace.c | 3 +++ 3 files changed, 79 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366098d591d..3121b95443d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { @@ -73,15 +74,18 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +extern int skip_trace(unsigned long ip); + void ftrace_disable_daemon(void); void ftrace_enable_daemon(void); #else +# define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f1e9e5c74e6..d1238163155 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -163,6 +163,8 @@ enum { }; static int ftrace_filtered; +static int tracing_on; +static int frozen_record_count; static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; @@ -195,6 +197,71 @@ static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; + +#ifdef CONFIG_KPROBES +static inline void freeze_record(struct dyn_ftrace *rec) +{ + if (!(rec->flags & FTRACE_FL_FROZEN)) { + rec->flags |= FTRACE_FL_FROZEN; + frozen_record_count++; + } +} + +static inline void unfreeze_record(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_FROZEN) { + rec->flags &= ~FTRACE_FL_FROZEN; + frozen_record_count--; + } +} + +static inline int record_frozen(struct dyn_ftrace *rec) +{ + return rec->flags & FTRACE_FL_FROZEN; +} +#else +# define freeze_record(rec) ({ 0; }) +# define unfreeze_record(rec) ({ 0; }) +# define record_frozen(rec) ({ 0; }) +#endif /* CONFIG_KPROBES */ + +int skip_trace(unsigned long ip) +{ + unsigned long fl; + struct dyn_ftrace *rec; + struct hlist_node *t; + struct hlist_head *head; + + if (frozen_record_count == 0) + return 0; + + head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)]; + hlist_for_each_entry_rcu(rec, t, head, node) { + if (rec->ip == ip) { + if (record_frozen(rec)) { + if (rec->flags & FTRACE_FL_FAILED) + return 1; + + if (!(rec->flags & FTRACE_FL_CONVERTED)) + return 1; + + if (!tracing_on || !ftrace_enabled) + return 1; + + if (ftrace_filtered) { + fl = rec->flags & (FTRACE_FL_FILTER | + FTRACE_FL_NOTRACE); + if (!fl || (fl & FTRACE_FL_NOTRACE)) + return 1; + } + } + break; + } + } + + return 0; +} + static inline int ftrace_ip_in_hash(unsigned long ip, unsigned long key) { @@ -489,8 +556,11 @@ static int __ftrace_modify_code(void *data) */ __ftrace_update_code(NULL); ftrace_replace_code(1); - } else if (*command & FTRACE_DISABLE_CALLS) + tracing_on = 1; + } else if (*command & FTRACE_DISABLE_CALLS) { ftrace_replace_code(0); + tracing_on = 0; + } if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6e9dae7eb41..9ade79369bf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -988,6 +988,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!tracer_enabled)) return; + if (skip_trace(ip)) + return; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; -- cgit v1.2.3-70-g09d2 From b5c21b4514b38f450848feb432f7120376d01ffe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2008 20:58:12 -0400 Subject: ftrace: check proper config for preempt type There is no CONFIG_PREEMPT_DESKTOP. Use the proper entry CONFIG_PREEMPT. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ade79369bf..f8fdb9cedc2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1341,7 +1341,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) "server", #elif defined(CONFIG_PREEMPT_VOLUNTARY) "desktop", -#elif defined(CONFIG_PREEMPT_DESKTOP) +#elif defined(CONFIG_PREEMPT) "preempt", #else "unknown", -- cgit v1.2.3-70-g09d2 From 60bc080090e3bf6afa29c62cb25f913706551010 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2008 20:58:16 -0400 Subject: ftrace: separate out the function enabled variable Currently the function tracer uses the global tracer_enabled variable that is used to keep track if the tracer is enabled or not. The function tracing startup needs to be separated out, otherwise the internal happenings of the tracer startup is also recorded. This patch creates a ftrace_function_enabled variable to all the starting of the function traces to happen after everything has been started. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8fdb9cedc2..2e37857f7df 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -96,6 +96,9 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; +/* function tracing enabled */ +int ftrace_function_enabled; + /* * trace_nr_entries is the number of entries that is allocated * for a buffer. Note, the number of entries is always rounded @@ -134,6 +137,7 @@ static notrace void no_trace_init(struct trace_array *tr) { int cpu; + ftrace_function_enabled = 0; if(tr->ctrl) for_each_online_cpu(cpu) tracing_reset(tr->data[cpu]); @@ -985,7 +989,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) long disabled; int cpu; - if (unlikely(!tracer_enabled)) + if (unlikely(!ftrace_function_enabled)) return; if (skip_trace(ip)) @@ -1010,11 +1014,15 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { + ftrace_function_enabled = 0; register_ftrace_function(&trace_ops); + if (tracer_enabled) + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) { + ftrace_function_enabled = 0; unregister_ftrace_function(&trace_ops); } #endif @@ -1850,8 +1858,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - if (iter->tr->ctrl) + if (iter->tr->ctrl) { tracer_enabled = 0; + ftrace_function_enabled = 0; + } if (iter->trace && iter->trace->open) iter->trace->open(iter); @@ -1884,8 +1894,14 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - if (iter->tr->ctrl) + if (iter->tr->ctrl) { tracer_enabled = 1; + /* + * It is safe to enable function tracing even if it + * isn't used + */ + ftrace_function_enabled = 1; + } mutex_unlock(&trace_types_lock); seq_release(inode, file); -- cgit v1.2.3-70-g09d2