From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:25 +1030
Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse,
 and cpulist_scnprintf to take pointers.

Impact: change calling convention of existing cpumask APIs

Most cpumask functions started with cpus_: these have been replaced by
cpumask_ ones which take struct cpumask pointers as expected.

These four functions don't have good replacement names; fortunately
they're rarely used, so we just change them over.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: mingo@redhat.com
Cc: tony.luck@intel.com
Cc: ralf@linux-mips.org
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: cl@linux-foundation.org
Cc: srostedt@redhat.com
---
 kernel/trace/trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f30..d2e75479dc5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2126,7 +2126,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2147,7 +2147,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	int err, cpu;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
-- 
cgit v1.2.3-70-g09d2


From 9e01c1b74c9531e301c900edaa92a99fcb7738f2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:22 +1030
Subject: cpumask: convert kernel trace functions

Impact: Reduce future memory usage, use new cpumask API.

(Eventually, cpumask_var_t will be allocated based on nr_cpu_ids, not NR_CPUS).

Convert kernel trace functions to use struct cpumask API:
1) Use cpumask_copy/cpumask_test_cpu/for_each_cpu.
2) Use cpumask_var_t and alloc_cpumask_var/free_cpumask_var everywhere.
3) Use on_each_cpu instead of playing with current->cpus_allowed.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c   | 42 ++++++++++++++++++-------------
 kernel/trace/trace.c         | 60 ++++++++++++++++++++++++++------------------
 kernel/trace/trace_sysprof.c | 13 +++-------
 3 files changed, 64 insertions(+), 51 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d601a7c458..a9d9760dc7b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -195,7 +195,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
 EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 
 #define for_each_buffer_cpu(buffer, cpu)		\
-	for_each_cpu_mask(cpu, buffer->cpumask)
+	for_each_cpu(cpu, buffer->cpumask)
 
 #define TS_SHIFT	27
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
@@ -267,7 +267,7 @@ struct ring_buffer {
 	unsigned			pages;
 	unsigned			flags;
 	int				cpus;
-	cpumask_t			cpumask;
+	cpumask_var_t			cpumask;
 	atomic_t			record_disabled;
 
 	struct mutex			mutex;
@@ -458,6 +458,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	if (!buffer)
 		return NULL;
 
+	if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
+		goto fail_free_buffer;
+
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 
@@ -465,14 +468,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	if (buffer->pages == 1)
 		buffer->pages++;
 
-	buffer->cpumask = cpu_possible_map;
+	cpumask_copy(buffer->cpumask, cpu_possible_mask);
 	buffer->cpus = nr_cpu_ids;
 
 	bsize = sizeof(void *) * nr_cpu_ids;
 	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
 				  GFP_KERNEL);
 	if (!buffer->buffers)
-		goto fail_free_buffer;
+		goto fail_free_cpumask;
 
 	for_each_buffer_cpu(buffer, cpu) {
 		buffer->buffers[cpu] =
@@ -492,6 +495,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	}
 	kfree(buffer->buffers);
 
+ fail_free_cpumask:
+	free_cpumask_var(buffer->cpumask);
+
  fail_free_buffer:
 	kfree(buffer);
 	return NULL;
@@ -510,6 +516,8 @@ ring_buffer_free(struct ring_buffer *buffer)
 	for_each_buffer_cpu(buffer, cpu)
 		rb_free_cpu_buffer(buffer->buffers[cpu]);
 
+	free_cpumask_var(buffer->cpumask);
+
 	kfree(buffer);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_free);
@@ -1283,7 +1291,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
 	cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1396,7 +1404,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 	cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1478,7 +1486,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1498,7 +1506,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1515,7 +1523,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1532,7 +1540,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -1850,7 +1858,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct buffer_page *reader;
 	int nr_loops = 0;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -2025,7 +2033,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct ring_buffer_event *event;
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2062,7 +2070,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	struct ring_buffer_iter *iter;
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
@@ -2172,7 +2180,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 	unsigned long flags;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2228,7 +2236,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 
-	if (!cpu_isset(cpu, buffer->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 1;
 
 	cpu_buffer = buffer->buffers[cpu];
@@ -2252,8 +2260,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 	struct ring_buffer_per_cpu *cpu_buffer_a;
 	struct ring_buffer_per_cpu *cpu_buffer_b;
 
-	if (!cpu_isset(cpu, buffer_a->cpumask) ||
-	    !cpu_isset(cpu, buffer_b->cpumask))
+	if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
+	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
 		return -EINVAL;
 
 	/* At least make sure the two buffers are somewhat the same */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0e91f43b6ba..5d04e27f3b4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -89,10 +89,10 @@ static inline void ftrace_enable_cpu(void)
 	preempt_enable();
 }
 
-static cpumask_t __read_mostly		tracing_buffer_mask;
+static cpumask_var_t __read_mostly	tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)	\
-	for_each_cpu_mask(cpu, tracing_buffer_mask)
+	for_each_cpu(cpu, tracing_buffer_mask)
 
 /*
  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -2646,13 +2646,7 @@ static struct file_operations show_traces_fops = {
 /*
  * Only trace on a CPU if the bitmask is set:
  */
-static cpumask_t tracing_cpumask = CPU_MASK_ALL;
-
-/*
- * When tracing/tracing_cpu_mask is modified then this holds
- * the new bitmask we are about to install:
- */
-static cpumask_t tracing_cpumask_new;
+static cpumask_var_t tracing_cpumask;
 
 /*
  * The tracer itself will not take this lock, but still we want
@@ -2674,7 +2668,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2693,9 +2687,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 		      size_t count, loff_t *ppos)
 {
 	int err, cpu;
+	cpumask_var_t tracing_cpumask_new;
+
+	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
+		return -ENOMEM;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
@@ -2706,26 +2704,28 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 		 * Increase/decrease the disabled counter if we are
 		 * about to flip a bit in the cpumask:
 		 */
-		if (cpu_isset(cpu, tracing_cpumask) &&
-				!cpu_isset(cpu, tracing_cpumask_new)) {
+		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_inc(&global_trace.data[cpu]->disabled);
 		}
-		if (!cpu_isset(cpu, tracing_cpumask) &&
-				cpu_isset(cpu, tracing_cpumask_new)) {
+		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_dec(&global_trace.data[cpu]->disabled);
 		}
 	}
 	__raw_spin_unlock(&ftrace_max_lock);
 	local_irq_enable();
 
-	tracing_cpumask = tracing_cpumask_new;
+	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
 
 	mutex_unlock(&tracing_cpumask_update_lock);
+	free_cpumask_var(tracing_cpumask_new);
 
 	return count;
 
 err_unlock:
 	mutex_unlock(&tracing_cpumask_update_lock);
+	free_cpumask_var(tracing_cpumask);
 
 	return err;
 }
@@ -3752,7 +3752,6 @@ void ftrace_dump(void)
 	static DEFINE_SPINLOCK(ftrace_dump_lock);
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
-	static cpumask_t mask;
 	static int dump_ran;
 	unsigned long flags;
 	int cnt = 0, cpu;
@@ -3786,8 +3785,6 @@ void ftrace_dump(void)
 	 * and then release the locks again.
 	 */
 
-	cpus_clear(mask);
-
 	while (!trace_empty(&iter)) {
 
 		if (!cnt)
@@ -3823,19 +3820,28 @@ __init static int tracer_alloc_buffers(void)
 {
 	struct trace_array_cpu *data;
 	int i;
+	int ret = -ENOMEM;
 
-	/* TODO: make the number of buffers hot pluggable with CPUS */
-	tracing_buffer_mask = cpu_possible_map;
+	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
+		goto out;
+
+	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+		goto out_free_buffer_mask;
 
+	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
+	cpumask_copy(tracing_cpumask, cpu_all_mask);
+
+	/* TODO: make the number of buffers hot pluggable with CPUS */
 	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
 						   TRACE_BUFFER_FLAGS);
 	if (!global_trace.buffer) {
 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
 		WARN_ON(1);
-		return 0;
+		goto out_free_cpumask;
 	}
 	global_trace.entries = ring_buffer_size(global_trace.buffer);
 
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
 					     TRACE_BUFFER_FLAGS);
@@ -3843,7 +3849,7 @@ __init static int tracer_alloc_buffers(void)
 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
 		WARN_ON(1);
 		ring_buffer_free(global_trace.buffer);
-		return 0;
+		goto out_free_cpumask;
 	}
 	max_tr.entries = ring_buffer_size(max_tr.buffer);
 	WARN_ON(max_tr.entries != global_trace.entries);
@@ -3873,8 +3879,14 @@ __init static int tracer_alloc_buffers(void)
 				       &trace_panic_notifier);
 
 	register_die_notifier(&trace_die_notifier);
+	ret = 0;
 
-	return 0;
+out_free_cpumask:
+	free_cpumask_var(tracing_cpumask);
+out_free_buffer_mask:
+	free_cpumask_var(tracing_buffer_mask);
+out:
+	return ret;
 }
 early_initcall(tracer_alloc_buffers);
 fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index a5779bd975d..eaca5ad803f 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -196,9 +196,9 @@ static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
-static void start_stack_timer(int cpu)
+static void start_stack_timer(void *unused)
 {
-	struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
+	struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
 
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = stack_trace_timer_fn;
@@ -208,14 +208,7 @@ static void start_stack_timer(int cpu)
 
 static void start_stack_timers(void)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		start_stack_timer(cpu);
-	}
-	set_cpus_allowed_ptr(current, &saved_mask);
+	on_each_cpu(start_stack_timer, NULL, 1);
 }
 
 static void stop_stack_timer(int cpu)
-- 
cgit v1.2.3-70-g09d2


From 4462344ee9ea9224d026801b877887f2f39774a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:23 +1030
Subject: cpumask: convert kernel trace functions further

Impact: Reduce future memory usage, use new cpumask API.

Since the last patch was created and acked, more old cpumask users
slipped into kernel/trace.

Mostly trivial conversions, except struct trace_iterator's "started"
member becomes a cpumask_var_t.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/trace/trace.c                 | 12 +++++++++---
 kernel/trace/trace.h                 |  2 +-
 kernel/trace/trace_boot.c            |  2 +-
 kernel/trace/trace_functions_graph.c |  2 +-
 kernel/trace/trace_hw_branches.c     |  6 +++---
 kernel/trace/trace_power.c           |  2 +-
 6 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d04e27f3b4..c580233add9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1811,10 +1811,10 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
 		return;
 
-	if (cpu_isset(iter->cpu, iter->started))
+	if (cpumask_test_cpu(iter->cpu, iter->started))
 		return;
 
-	cpu_set(iter->cpu, iter->started);
+	cpumask_set_cpu(iter->cpu, iter->started);
 	trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
 }
 
@@ -3114,10 +3114,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (!iter)
 		return -ENOMEM;
 
+	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
+		kfree(iter);
+		return -ENOMEM;
+	}
+
 	mutex_lock(&trace_types_lock);
 
 	/* trace pipe does not show start of buffer */
-	cpus_setall(iter->started);
+	cpumask_setall(iter->started);
 
 	iter->tr = &global_trace;
 	iter->trace = current_trace;
@@ -3134,6 +3139,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
 
+	free_cpumask_var(iter->started);
 	kfree(iter);
 	atomic_dec(&tracing_reader);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cc7a4f86403..4d3d381bfd9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -368,7 +368,7 @@ struct trace_iterator {
 	loff_t			pos;
 	long			idx;
 
-	cpumask_t		started;
+	cpumask_var_t		started;
 };
 
 int tracing_is_enabled(void);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 3ccebde2848..366c8c333e1 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -42,7 +42,7 @@ static int boot_trace_init(struct trace_array *tr)
 	int cpu;
 	boot_trace = tr;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		tracing_reset(tr, cpu);
 
 	tracing_sched_switch_assign_trace(tr);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4bf39fcae97..930c08e5b38 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -79,7 +79,7 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 	int i;
 	int ret;
 	int log10_this = log10_cpu(cpu);
-	int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+	int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
 
 
 	/*
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index b6a3e20a49a..649df22d435 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -46,7 +46,7 @@ static void bts_trace_start(struct trace_array *tr)
 
 	tracing_reset_online_cpus(tr);
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
 }
 
@@ -62,7 +62,7 @@ static void bts_trace_stop(struct trace_array *tr)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
 }
 
@@ -172,7 +172,7 @@ static void trace_bts_prepare(struct trace_iterator *iter)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
 }
 
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index a7172a352f6..7bda248daf5 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -39,7 +39,7 @@ static int power_trace_init(struct trace_array *tr)
 
 	trace_power_enabled = 1;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		tracing_reset(tr, cpu);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 465634adc1d09b490c8ee31885575be39d375d53 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 7 Jan 2009 15:32:11 +0100
Subject: ring_buffer: fix ring_buffer_event_length()

Function ring_buffer_event_length() provides an interface to detect
the length of data stored in an entry. However, the length contains
offsets depending on the internal usage. This makes it unusable. This
patch fixes this and now ring_buffer_event_length() returns the
alligned length that has been used in ring_buffer_lock_reserve().

Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 kernel/trace/ring_buffer.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 30d57dd01a8..d42b882dfe4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -117,7 +117,13 @@ rb_event_length(struct ring_buffer_event *event)
  */
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
-	return rb_event_length(event);
+	unsigned length = rb_event_length(event);
+	if (event->type != RINGBUF_TYPE_DATA)
+		return length;
+	length -= RB_EVNT_HDR_SIZE;
+	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+                length -= sizeof(event->array[0]);
+	return length;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 
-- 
cgit v1.2.3-70-g09d2


From 082605de5f82eb692cc90f7fda071cc01bb5ac34 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Jan 2009 14:32:51 -0500
Subject: ring-buffer: fix alignment problem

Impact: fix to allow some archs to use the ring buffer

Commits in the ring buffer are checked by pointer arithmetic.
If the calculation is incorrect, then the commits will never take
place and the buffer will simply fill up and report an error.

Each page in the ring buffer has a small header:

struct buffer_data_page {
	u64		time_stamp;
	local_t		commit;
	unsigned char	data[];
};

Unfortuntely, some of the calculations used sizeof(struct buffer_data_page)
to know the size of the header. But this is incorrect on some archs,
where sizeof(struct buffer_data_page) does not equal
offsetof(struct buffer_data_page, data), and on those archs, the commits
are never processed.

This patch replaces the sizeof with offsetof.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662e..1d6526361d0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
 	return 0;
 }
 
-#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
 
 /*
  * head_page == tail_page && head == tail then buffer is empty.
-- 
cgit v1.2.3-70-g09d2


From 00f57f545afa422db3003b0d0b30a30f8de7ecb2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 14 Jan 2009 13:33:27 -0800
Subject: tracing/function-graph-tracer: fix a regression while suspend to disk

Impact: fix a crash while kernel image restore

When the function graph tracer is running and while suspend to disk, some racy
and dangerous things happen against this tracer.

The current task will save its registers including the stack pointer which
contains the return address hooked by the tracer. But the current task will
continue to enter other functions after that to save the memory, and then
it will store other return addresses, and finally loose the old depth which
matches the return address saved in the old stack (during the registers saving).

So on image restore, the code will return to wrong addresses.
And there are other things: on restore, the task will have it's "current"
pointer overwritten during registers restoring....switching from one task to
another... That would be insane to try to trace function graphs at these
stages.

This patch makes the function graph tracer listening on power events, making
it's tracing disabled for the current task (the one that performs the
hibernation work) while suspend/resume to disk, making the tracing safe
during hibernation.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09d..7dcf6e9f2b0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 #include <linux/debugfs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
@@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
@@ -2043,6 +2045,27 @@ static int start_graph_tracing(void)
 	return ret;
 }
 
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+							void *unused)
+{
+	switch (state) {
+	case PM_HIBERNATION_PREPARE:
+		pause_graph_tracing();
+		break;
+
+	case PM_POST_HIBERNATION:
+		unpause_graph_tracing();
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 			trace_func_graph_ent_t entryfunc)
 {
@@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 
 	mutex_lock(&ftrace_sysctl_lock);
 
+	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+	register_pm_notifier(&ftrace_suspend_notifier);
+
 	atomic_inc(&ftrace_graph_active);
 	ret = start_graph_tracing();
 	if (ret) {
@@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void)
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
 	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+	unregister_pm_notifier(&ftrace_suspend_notifier);
 
 	mutex_unlock(&ftrace_sysctl_lock);
 }
-- 
cgit v1.2.3-70-g09d2


From 551b4048b3d4acf15aff9fe4aed89b892c135b02 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 12 Jan 2009 11:06:18 +0800
Subject: ring_buffer: reset write when reserve buffer fail

Impact: reset struct buffer_page.write when interrupt storm

if struct buffer_page.write is not reset, any succedent committing
will corrupted ring_buffer:

static inline void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
	......
		cpu_buffer->commit_page->commit =
			cpu_buffer->commit_page->write;
	......
}

when "if (RB_WARN_ON(cpu_buffer, next_page == reader_page))", ring_buffer
is disabled, but some reserved buffers may haven't been committed.
we need reset struct buffer_page.write.

when "if (unlikely(next_page == cpu_buffer->commit_page))", ring_buffer
is still available, we should not corrupt it.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d6526361d0..9c1e73da4e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		}
 
 		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE)) {
-				/* reset write */
-				if (tail <= BUF_PAGE_SIZE)
-					local_set(&tail_page->write, tail);
+			if (!(buffer->flags & RB_FL_OVERWRITE))
 				goto out_unlock;
-			}
 
 			/* tail_page has not moved yet? */
 			if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 
  out_unlock:
+	/* reset write */
+	if (tail <= BUF_PAGE_SIZE)
+		local_set(&tail_page->write, tail);
+
 	__raw_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From faf6861ebd776871e77b761c43ec045cd20b5716 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 14 Jan 2009 12:24:42 -0500
Subject: trace: print ftrace_dump at KERN_EMERG log level

Impact: fix to print out ftrace_dump when expected

I was debugging a hard race condition to only find out that
after I hit the race, my log level was not at level to show
KERN_INFO. The time it took to trigger the race was wasted because
I did not capture the trace.

Since ftrace_dump is only called from kernel oops (and only when
it is set in the kernel command line to do so), or when a
developer adds it to their own local tree, the log level of
the print should be at KERN_EMERG to make sure the print appears.

ftrace_dump is not called by a normal user setup, and will not
add extra unwanted print out to the console. There is no reason
it should be at KERN_INFO.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add9..1a1c5a6ab24 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
  * it if we decide to change what log level the ftrace dump
  * should be at.
  */
-#define KERN_TRACE		KERN_INFO
+#define KERN_TRACE		KERN_EMERG
 
 static void
 trace_printk_seq(struct trace_seq *s)
-- 
cgit v1.2.3-70-g09d2


From a442e5e0a2011af5b2d1f118fee0a8f9079f1d88 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 14 Jan 2009 14:50:19 -0500
Subject: trace: stop all recording to ring buffer on ftrace_dump

Impact: limit ftrace dump output

Currently ftrace_dump only calls ftrace_kill that is a fast way
to prevent the function tracer functions from being called (just sets
a flag and clears the function to call, nothing else). It is better
to also turn off any recording to the ring buffers as well.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1a1c5a6ab24..4d89e84f0f4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
 	dump_ran = 1;
 
 	/* No turning back! */
+	tracing_off();
 	ftrace_kill();
 
 	for_each_tracing_cpu(cpu) {
-- 
cgit v1.2.3-70-g09d2


From 1092307d582a7566d23779c304cf86f3075ac5f0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 15 Jan 2009 23:40:11 -0500
Subject: trace: set max latency variable to zero on default

Impact: trace max latencies on start of latency tracing

This patch sets the max latency to zero whenever one of the
irq variant tracers or the wakeup tracer is set to current tracer.

Most developers expect to see output when starting up a latency
tracer. But since the max_latency is already set to max, and
it takes a latency greater than max_latency to be recorded, there
is no trace. This is not the expected behavior and has even confused
myself.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.c              | 2 +-
 kernel/trace/trace_irqsoff.c      | 1 +
 kernel/trace/trace_sched_wakeup.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4d89e84f0f4..17bb88d86ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
 
 #define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
 
-unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly	tracing_max_latency;
 unsigned long __read_mostly	tracing_thresh;
 
 /*
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8..62a78d94353 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
 {
+	tracing_max_latency = 0;
 	irqsoff_trace = tr;
 	/* make sure that the tracer is visible */
 	smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e3..42ae1e77b6b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
 
 static int wakeup_tracer_init(struct trace_array *tr)
 {
+	tracing_max_latency = 0;
 	wakeup_trace = tr;
 	start_wakeup_tracer(tr);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 91a8d07d82cac3aae3ef2ea1aaba5c9c4a934e91 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 21 Jan 2009 18:45:57 -0500
Subject: ring-buffer: reset timestamps when ring buffer is reset

Impact: fix bad times of recent resets

The ring buffer needs to reset its timestamps when reseting of the
buffer, otherwise the timestamps are stale and might be used to
calculate times in the buffer causing funny timestamps to appear.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9c1e73da4e3..bd38c5cfd8a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
+
+	cpu_buffer->write_stamp = 0;
+	cpu_buffer->read_stamp = 0;
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 229c4ef8ae56d69f8dec64533bf1c7f8070c1a4a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 3 Feb 2009 20:39:04 +0100
Subject: ftrace: do_each_pid_task() needs rcu lock

"ftrace: use struct pid" commit 978f3a45d9499c7a447ca7615455cefb63d44165
converted ftrace_pid_trace to "struct pid*".

But we can't use do_each_pid_task() without rcu_read_lock() even if
we know the pid itself can't go away (it was pinned in ftrace_pid_write).
The exiting task can detach itself from this pid at any moment.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ftrace.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7dcf6e9f2b0..9a236ffe2aa 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1737,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
 {
 	struct task_struct *p;
 
+	rcu_read_lock();
 	do_each_pid_task(pid, PIDTYPE_PID, p) {
 		clear_tsk_trace_trace(p);
 	} while_each_pid_task(pid, PIDTYPE_PID, p);
+	rcu_read_unlock();
+
 	put_pid(pid);
 }
 
@@ -1747,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
 {
 	struct task_struct *p;
 
+	rcu_read_lock();
 	do_each_pid_task(pid, PIDTYPE_PID, p) {
 		set_tsk_trace_trace(p);
 	} while_each_pid_task(pid, PIDTYPE_PID, p);
+	rcu_read_unlock();
 }
 
 static void clear_ftrace_pid_task(struct pid **pid)
-- 
cgit v1.2.3-70-g09d2