From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:25 +1030 Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse, and cpulist_scnprintf to take pointers. Impact: change calling convention of existing cpumask APIs Most cpumask functions started with cpus_: these have been replaced by cpumask_ ones which take struct cpumask pointers as expected. These four functions don't have good replacement names; fortunately they're rarely used, so we just change them over. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Cc: paulus@samba.org Cc: mingo@redhat.com Cc: tony.luck@intel.com Cc: ralf@linux-mips.org Cc: Greg Kroah-Hartman Cc: cl@linux-foundation.org Cc: srostedt@redhat.com --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d86e3252f30..d2e75479dc5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2126,7 +2126,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2147,7 +2147,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); if (err) goto err_unlock; -- cgit v1.2.3-70-g09d2 From 9e01c1b74c9531e301c900edaa92a99fcb7738f2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:22 +1030 Subject: cpumask: convert kernel trace functions Impact: Reduce future memory usage, use new cpumask API. (Eventually, cpumask_var_t will be allocated based on nr_cpu_ids, not NR_CPUS). Convert kernel trace functions to use struct cpumask API: 1) Use cpumask_copy/cpumask_test_cpu/for_each_cpu. 2) Use cpumask_var_t and alloc_cpumask_var/free_cpumask_var everywhere. 3) Use on_each_cpu instead of playing with current->cpus_allowed. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 42 ++++++++++++++++++------------- kernel/trace/trace.c | 60 ++++++++++++++++++++++++++------------------ kernel/trace/trace_sysprof.c | 13 +++------- 3 files changed, 64 insertions(+), 51 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d601a7c458..a9d9760dc7b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -195,7 +195,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event) EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define for_each_buffer_cpu(buffer, cpu) \ - for_each_cpu_mask(cpu, buffer->cpumask) + for_each_cpu(cpu, buffer->cpumask) #define TS_SHIFT 27 #define TS_MASK ((1ULL << TS_SHIFT) - 1) @@ -267,7 +267,7 @@ struct ring_buffer { unsigned pages; unsigned flags; int cpus; - cpumask_t cpumask; + cpumask_var_t cpumask; atomic_t record_disabled; struct mutex mutex; @@ -458,6 +458,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) if (!buffer) return NULL; + if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) + goto fail_free_buffer; + buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); buffer->flags = flags; @@ -465,14 +468,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) if (buffer->pages == 1) buffer->pages++; - buffer->cpumask = cpu_possible_map; + cpumask_copy(buffer->cpumask, cpu_possible_mask); buffer->cpus = nr_cpu_ids; bsize = sizeof(void *) * nr_cpu_ids; buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), GFP_KERNEL); if (!buffer->buffers) - goto fail_free_buffer; + goto fail_free_cpumask; for_each_buffer_cpu(buffer, cpu) { buffer->buffers[cpu] = @@ -492,6 +495,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) } kfree(buffer->buffers); + fail_free_cpumask: + free_cpumask_var(buffer->cpumask); + fail_free_buffer: kfree(buffer); return NULL; @@ -510,6 +516,8 @@ ring_buffer_free(struct ring_buffer *buffer) for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); + free_cpumask_var(buffer->cpumask); + kfree(buffer); } EXPORT_SYMBOL_GPL(ring_buffer_free); @@ -1283,7 +1291,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, cpu = raw_smp_processor_id(); - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) goto out; cpu_buffer = buffer->buffers[cpu]; @@ -1396,7 +1404,7 @@ int ring_buffer_write(struct ring_buffer *buffer, cpu = raw_smp_processor_id(); - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) goto out; cpu_buffer = buffer->buffers[cpu]; @@ -1478,7 +1486,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; cpu_buffer = buffer->buffers[cpu]; @@ -1498,7 +1506,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; cpu_buffer = buffer->buffers[cpu]; @@ -1515,7 +1523,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; @@ -1532,7 +1540,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; @@ -1850,7 +1858,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) struct buffer_page *reader; int nr_loops = 0; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; cpu_buffer = buffer->buffers[cpu]; @@ -2025,7 +2033,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_event *event; unsigned long flags; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -2062,7 +2070,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) struct ring_buffer_iter *iter; unsigned long flags; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; iter = kmalloc(sizeof(*iter), GFP_KERNEL); @@ -2172,7 +2180,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; unsigned long flags; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -2228,7 +2236,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - if (!cpu_isset(cpu, buffer->cpumask)) + if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 1; cpu_buffer = buffer->buffers[cpu]; @@ -2252,8 +2260,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, struct ring_buffer_per_cpu *cpu_buffer_a; struct ring_buffer_per_cpu *cpu_buffer_b; - if (!cpu_isset(cpu, buffer_a->cpumask) || - !cpu_isset(cpu, buffer_b->cpumask)) + if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || + !cpumask_test_cpu(cpu, buffer_b->cpumask)) return -EINVAL; /* At least make sure the two buffers are somewhat the same */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e91f43b6ba..5d04e27f3b4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -89,10 +89,10 @@ static inline void ftrace_enable_cpu(void) preempt_enable(); } -static cpumask_t __read_mostly tracing_buffer_mask; +static cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ - for_each_cpu_mask(cpu, tracing_buffer_mask) + for_each_cpu(cpu, tracing_buffer_mask) /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops @@ -2646,13 +2646,7 @@ static struct file_operations show_traces_fops = { /* * Only trace on a CPU if the bitmask is set: */ -static cpumask_t tracing_cpumask = CPU_MASK_ALL; - -/* - * When tracing/tracing_cpu_mask is modified then this holds - * the new bitmask we are about to install: - */ -static cpumask_t tracing_cpumask_new; +static cpumask_var_t tracing_cpumask; /* * The tracer itself will not take this lock, but still we want @@ -2674,7 +2668,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2693,9 +2687,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { int err, cpu; + cpumask_var_t tracing_cpumask_new; + + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + return -ENOMEM; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) goto err_unlock; @@ -2706,26 +2704,28 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, * Increase/decrease the disabled counter if we are * about to flip a bit in the cpumask: */ - if (cpu_isset(cpu, tracing_cpumask) && - !cpu_isset(cpu, tracing_cpumask_new)) { + if (cpumask_test_cpu(cpu, tracing_cpumask) && + !cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_inc(&global_trace.data[cpu]->disabled); } - if (!cpu_isset(cpu, tracing_cpumask) && - cpu_isset(cpu, tracing_cpumask_new)) { + if (!cpumask_test_cpu(cpu, tracing_cpumask) && + cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_dec(&global_trace.data[cpu]->disabled); } } __raw_spin_unlock(&ftrace_max_lock); local_irq_enable(); - tracing_cpumask = tracing_cpumask_new; + cpumask_copy(tracing_cpumask, tracing_cpumask_new); mutex_unlock(&tracing_cpumask_update_lock); + free_cpumask_var(tracing_cpumask_new); return count; err_unlock: mutex_unlock(&tracing_cpumask_update_lock); + free_cpumask_var(tracing_cpumask); return err; } @@ -3752,7 +3752,6 @@ void ftrace_dump(void) static DEFINE_SPINLOCK(ftrace_dump_lock); /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; - static cpumask_t mask; static int dump_ran; unsigned long flags; int cnt = 0, cpu; @@ -3786,8 +3785,6 @@ void ftrace_dump(void) * and then release the locks again. */ - cpus_clear(mask); - while (!trace_empty(&iter)) { if (!cnt) @@ -3823,19 +3820,28 @@ __init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; int i; + int ret = -ENOMEM; - /* TODO: make the number of buffers hot pluggable with CPUS */ - tracing_buffer_mask = cpu_possible_map; + if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) + goto out; + + if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) + goto out_free_buffer_mask; + cpumask_copy(tracing_buffer_mask, cpu_possible_mask); + cpumask_copy(tracing_cpumask, cpu_all_mask); + + /* TODO: make the number of buffers hot pluggable with CPUS */ global_trace.buffer = ring_buffer_alloc(trace_buf_size, TRACE_BUFFER_FLAGS); if (!global_trace.buffer) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); - return 0; + goto out_free_cpumask; } global_trace.entries = ring_buffer_size(global_trace.buffer); + #ifdef CONFIG_TRACER_MAX_TRACE max_tr.buffer = ring_buffer_alloc(trace_buf_size, TRACE_BUFFER_FLAGS); @@ -3843,7 +3849,7 @@ __init static int tracer_alloc_buffers(void) printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); WARN_ON(1); ring_buffer_free(global_trace.buffer); - return 0; + goto out_free_cpumask; } max_tr.entries = ring_buffer_size(max_tr.buffer); WARN_ON(max_tr.entries != global_trace.entries); @@ -3873,8 +3879,14 @@ __init static int tracer_alloc_buffers(void) &trace_panic_notifier); register_die_notifier(&trace_die_notifier); + ret = 0; - return 0; +out_free_cpumask: + free_cpumask_var(tracing_cpumask); +out_free_buffer_mask: + free_cpumask_var(tracing_buffer_mask); +out: + return ret; } early_initcall(tracer_alloc_buffers); fs_initcall(tracer_init_debugfs); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index a5779bd975d..eaca5ad803f 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -196,9 +196,9 @@ static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void start_stack_timer(int cpu) +static void start_stack_timer(void *unused) { - struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); + struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = stack_trace_timer_fn; @@ -208,14 +208,7 @@ static void start_stack_timer(int cpu) static void start_stack_timers(void) { - cpumask_t saved_mask = current->cpus_allowed; - int cpu; - - for_each_online_cpu(cpu) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - start_stack_timer(cpu); - } - set_cpus_allowed_ptr(current, &saved_mask); + on_each_cpu(start_stack_timer, NULL, 1); } static void stop_stack_timer(int cpu) -- cgit v1.2.3-70-g09d2 From 4462344ee9ea9224d026801b877887f2f39774a3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:23 +1030 Subject: cpumask: convert kernel trace functions further Impact: Reduce future memory usage, use new cpumask API. Since the last patch was created and acked, more old cpumask users slipped into kernel/trace. Mostly trivial conversions, except struct trace_iterator's "started" member becomes a cpumask_var_t. Signed-off-by: Rusty Russell --- kernel/trace/trace.c | 12 +++++++++--- kernel/trace/trace.h | 2 +- kernel/trace/trace_boot.c | 2 +- kernel/trace/trace_functions_graph.c | 2 +- kernel/trace/trace_hw_branches.c | 6 +++--- kernel/trace/trace_power.c | 2 +- 6 files changed, 16 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5d04e27f3b4..c580233add9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1811,10 +1811,10 @@ static void test_cpu_buff_start(struct trace_iterator *iter) if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) return; - if (cpu_isset(iter->cpu, iter->started)) + if (cpumask_test_cpu(iter->cpu, iter->started)) return; - cpu_set(iter->cpu, iter->started); + cpumask_set_cpu(iter->cpu, iter->started); trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } @@ -3114,10 +3114,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (!iter) return -ENOMEM; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { + kfree(iter); + return -ENOMEM; + } + mutex_lock(&trace_types_lock); /* trace pipe does not show start of buffer */ - cpus_setall(iter->started); + cpumask_setall(iter->started); iter->tr = &global_trace; iter->trace = current_trace; @@ -3134,6 +3139,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + free_cpumask_var(iter->started); kfree(iter); atomic_dec(&tracing_reader); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f86403..4d3d381bfd9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -368,7 +368,7 @@ struct trace_iterator { loff_t pos; long idx; - cpumask_t started; + cpumask_var_t started; }; int tracing_is_enabled(void); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 3ccebde2848..366c8c333e1 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -42,7 +42,7 @@ static int boot_trace_init(struct trace_array *tr) int cpu; boot_trace = tr; - for_each_cpu_mask(cpu, cpu_possible_map) + for_each_cpu(cpu, cpu_possible_mask) tracing_reset(tr, cpu); tracing_sched_switch_assign_trace(tr); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4bf39fcae97..930c08e5b38 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -79,7 +79,7 @@ print_graph_cpu(struct trace_seq *s, int cpu) int i; int ret; int log10_this = log10_cpu(cpu); - int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map)); + int log10_all = log10_cpu(cpumask_weight(cpu_online_mask)); /* diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index b6a3e20a49a..649df22d435 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -46,7 +46,7 @@ static void bts_trace_start(struct trace_array *tr) tracing_reset_online_cpus(tr); - for_each_cpu_mask(cpu, cpu_possible_map) + for_each_cpu(cpu, cpu_possible_mask) smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); } @@ -62,7 +62,7 @@ static void bts_trace_stop(struct trace_array *tr) { int cpu; - for_each_cpu_mask(cpu, cpu_possible_map) + for_each_cpu(cpu, cpu_possible_mask) smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); } @@ -172,7 +172,7 @@ static void trace_bts_prepare(struct trace_iterator *iter) { int cpu; - for_each_cpu_mask(cpu, cpu_possible_map) + for_each_cpu(cpu, cpu_possible_mask) smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1); } diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index a7172a352f6..7bda248daf5 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -39,7 +39,7 @@ static int power_trace_init(struct trace_array *tr) trace_power_enabled = 1; - for_each_cpu_mask(cpu, cpu_possible_map) + for_each_cpu(cpu, cpu_possible_mask) tracing_reset(tr, cpu); return 0; } -- cgit v1.2.3-70-g09d2 From 465634adc1d09b490c8ee31885575be39d375d53 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 7 Jan 2009 15:32:11 +0100 Subject: ring_buffer: fix ring_buffer_event_length() Function ring_buffer_event_length() provides an interface to detect the length of data stored in an entry. However, the length contains offsets depending on the internal usage. This makes it unusable. This patch fixes this and now ring_buffer_event_length() returns the alligned length that has been used in ring_buffer_lock_reserve(). Cc: Steven Rostedt Signed-off-by: Robert Richter --- kernel/trace/ring_buffer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 30d57dd01a8..d42b882dfe4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -117,7 +117,13 @@ rb_event_length(struct ring_buffer_event *event) */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { - return rb_event_length(event); + unsigned length = rb_event_length(event); + if (event->type != RINGBUF_TYPE_DATA) + return length; + length -= RB_EVNT_HDR_SIZE; + if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) + length -= sizeof(event->array[0]); + return length; } EXPORT_SYMBOL_GPL(ring_buffer_event_length); -- cgit v1.2.3-70-g09d2 From 082605de5f82eb692cc90f7fda071cc01bb5ac34 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 19 Jan 2009 14:32:51 -0500 Subject: ring-buffer: fix alignment problem Impact: fix to allow some archs to use the ring buffer Commits in the ring buffer are checked by pointer arithmetic. If the calculation is incorrect, then the commits will never take place and the buffer will simply fill up and report an error. Each page in the ring buffer has a small header: struct buffer_data_page { u64 time_stamp; local_t commit; unsigned char data[]; }; Unfortuntely, some of the calculations used sizeof(struct buffer_data_page) to know the size of the header. But this is incorrect on some archs, where sizeof(struct buffer_data_page) does not equal offsetof(struct buffer_data_page, data), and on those archs, the commits are never processed. This patch replaces the sizeof with offsetof. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8b0daf0662e..1d6526361d0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) /* * head_page == tail_page && head == tail then buffer is empty. -- cgit v1.2.3-70-g09d2 From 00f57f545afa422db3003b0d0b30a30f8de7ecb2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Jan 2009 13:33:27 -0800 Subject: tracing/function-graph-tracer: fix a regression while suspend to disk Impact: fix a crash while kernel image restore When the function graph tracer is running and while suspend to disk, some racy and dangerous things happen against this tracer. The current task will save its registers including the stack pointer which contains the return address hooked by the tracer. But the current task will continue to enter other functions after that to save the memory, and then it will store other return addresses, and finally loose the old depth which matches the return address saved in the old stack (during the registers saving). So on image restore, the code will return to wrong addresses. And there are other things: on restore, the task will have it's "current" pointer overwritten during registers restoring....switching from one task to another... That would be insane to try to trace function graphs at these stages. This patch makes the function graph tracer listening on power events, making it's tracing disabled for the current task (the one that performs the hibernation work) while suspend/resume to disk, making the tracing safe during hibernation. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2f32969c09d..7dcf6e9f2b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_graph_active; +static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -2043,6 +2045,27 @@ static int start_graph_tracing(void) return ret; } +/* + * Hibernation protection. + * The state of the current task is too much unstable during + * suspend/restore to disk. We want to protect against that. + */ +static int +ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, + void *unused) +{ + switch (state) { + case PM_HIBERNATION_PREPARE: + pause_graph_tracing(); + break; + + case PM_POST_HIBERNATION: + unpause_graph_tracing(); + break; + } + return NOTIFY_DONE; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; + register_pm_notifier(&ftrace_suspend_notifier); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { @@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); + unregister_pm_notifier(&ftrace_suspend_notifier); mutex_unlock(&ftrace_sysctl_lock); } -- cgit v1.2.3-70-g09d2 From 551b4048b3d4acf15aff9fe4aed89b892c135b02 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 12 Jan 2009 11:06:18 +0800 Subject: ring_buffer: reset write when reserve buffer fail Impact: reset struct buffer_page.write when interrupt storm if struct buffer_page.write is not reset, any succedent committing will corrupted ring_buffer: static inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { ...... cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; ...... } when "if (RB_WARN_ON(cpu_buffer, next_page == reader_page))", ring_buffer is disabled, but some reserved buffers may haven't been committed. we need reset struct buffer_page.write. when "if (unlikely(next_page == cpu_buffer->commit_page))", ring_buffer is still available, we should not corrupt it. Signed-off-by: Lai Jiangshan Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d6526361d0..9c1e73da4e3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) { - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_unlock; - } /* tail_page has not moved yet? */ if (tail_page == cpu_buffer->tail_page) { @@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: + /* reset write */ + if (tail <= BUF_PAGE_SIZE) + local_set(&tail_page->write, tail); + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; -- cgit v1.2.3-70-g09d2 From faf6861ebd776871e77b761c43ec045cd20b5716 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 12:24:42 -0500 Subject: trace: print ftrace_dump at KERN_EMERG log level Impact: fix to print out ftrace_dump when expected I was debugging a hard race condition to only find out that after I hit the race, my log level was not at level to show KERN_INFO. The time it took to trigger the race was wasted because I did not capture the trace. Since ftrace_dump is only called from kernel oops (and only when it is set in the kernel command line to do so), or when a developer adds it to their own local tree, the log level of the print should be at KERN_EMERG to make sure the print appears. ftrace_dump is not called by a normal user setup, and will not add extra unwanted print out to the console. There is no reason it should be at KERN_INFO. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c580233add9..1a1c5a6ab24 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) -- cgit v1.2.3-70-g09d2 From a442e5e0a2011af5b2d1f118fee0a8f9079f1d88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 14:50:19 -0500 Subject: trace: stop all recording to ring buffer on ftrace_dump Impact: limit ftrace dump output Currently ftrace_dump only calls ftrace_kill that is a fast way to prevent the function tracer functions from being called (just sets a flag and clears the function to call, nothing else). It is better to also turn off any recording to the ring buffers as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a1c5a6ab24..4d89e84f0f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3770,6 +3770,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { -- cgit v1.2.3-70-g09d2 From 1092307d582a7566d23779c304cf86f3075ac5f0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:40:11 -0500 Subject: trace: set max latency variable to zero on default Impact: trace max latencies on start of latency tracing This patch sets the max latency to zero whenever one of the irq variant tracers or the wakeup tracer is set to current tracer. Most developers expect to see output when starting up a latency tracer. But since the max_latency is already set to max, and it takes a latency greater than max_latency to be recorded, there is no trace. This is not the expected behavior and has even confused myself. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- kernel/trace/trace_irqsoff.c | 1 + kernel/trace/trace_sched_wakeup.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4d89e84f0f4..17bb88d86ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,7 +40,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7c2e326bbc8..62a78d94353 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 43586b689e3..42ae1e77b6b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int wakeup_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); return 0; -- cgit v1.2.3-70-g09d2 From 91a8d07d82cac3aae3ef2ea1aaba5c9c4a934e91 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Jan 2009 18:45:57 -0500 Subject: ring-buffer: reset timestamps when ring buffer is reset Impact: fix bad times of recent resets The ring buffer needs to reset its timestamps when reseting of the buffer, otherwise the timestamps are stale and might be used to calculate times in the buffer causing funny timestamps to appear. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c1e73da4e3..bd38c5cfd8a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->overrun = 0; cpu_buffer->entries = 0; + + cpu_buffer->write_stamp = 0; + cpu_buffer->read_stamp = 0; } /** -- cgit v1.2.3-70-g09d2 From 229c4ef8ae56d69f8dec64533bf1c7f8070c1a4a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Feb 2009 20:39:04 +0100 Subject: ftrace: do_each_pid_task() needs rcu lock "ftrace: use struct pid" commit 978f3a45d9499c7a447ca7615455cefb63d44165 converted ftrace_pid_trace to "struct pid*". But we can't use do_each_pid_task() without rcu_read_lock() even if we know the pid itself can't go away (it was pinned in ftrace_pid_write). The exiting task can detach itself from this pid at any moment. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7dcf6e9f2b0..9a236ffe2aa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1737,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid) { struct task_struct *p; + rcu_read_lock(); do_each_pid_task(pid, PIDTYPE_PID, p) { clear_tsk_trace_trace(p); } while_each_pid_task(pid, PIDTYPE_PID, p); + rcu_read_unlock(); + put_pid(pid); } @@ -1747,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid) { struct task_struct *p; + rcu_read_lock(); do_each_pid_task(pid, PIDTYPE_PID, p) { set_tsk_trace_trace(p); } while_each_pid_task(pid, PIDTYPE_PID, p); + rcu_read_unlock(); } static void clear_ftrace_pid_task(struct pid **pid) -- cgit v1.2.3-70-g09d2