From a6f0eb6adc42e5eed3f35af99c61c0e411b16f8e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 11 Nov 2009 17:14:07 -0500
Subject: ring-buffer: Add multiple iterations between benchmark timestamps

The ring_buffer_benchmark does a gettimeofday after every write to the
ring buffer in its measurements. This adds the overhead of the call
to gettimeofday to the measurements and does not give an accurate picture
of the length of time it takes to record a trace.

This was first noticed with perf top:

------------------------------------------------------------------------------
   PerfTop:     679 irqs/sec  kernel:99.9% [1000Hz cpu-clock-msecs],  (all, 4 CPUs)
------------------------------------------------------------------------------

             samples    pcnt   kernel function
             _______   _____   _______________

             1673.00 - 27.8% : trace_clock_local
              806.00 - 13.4% : do_gettimeofday
              590.00 -  9.8% : rb_reserve_next_event
              554.00 -  9.2% : native_read_tsc
              431.00 -  7.2% : ring_buffer_lock_reserve
              365.00 -  6.1% : __rb_reserve_next
              355.00 -  5.9% : rb_end_commit
              322.00 -  5.4% : getnstimeofday
              268.00 -  4.5% : ring_buffer_unlock_commit
              262.00 -  4.4% : ring_buffer_producer_thread 	[ring_buffer_benchmark]
              113.00 -  1.9% : read_tsc
               91.00 -  1.5% : debug_smp_processor_id
               69.00 -  1.1% : trace_recursive_unlock
               66.00 -  1.1% : ring_buffer_event_data
               25.00 -  0.4% : _spin_unlock_irq

And the length of each write to the ring buffer measured at 310ns.

This patch adds a new module parameter called "write_interval" which is
defaulted to 50. This is the number of writes performed between
timestamps. After this patch perf top shows:

------------------------------------------------------------------------------
   PerfTop:     244 irqs/sec  kernel:100.0% [1000Hz cpu-clock-msecs],  (all, 4 CPUs)
------------------------------------------------------------------------------

             samples    pcnt   kernel function
             _______   _____   _______________

             2842.00 - 40.4% : trace_clock_local
             1043.00 - 14.8% : rb_reserve_next_event
              784.00 - 11.1% : ring_buffer_lock_reserve
              600.00 -  8.5% : __rb_reserve_next
              579.00 -  8.2% : rb_end_commit
              440.00 -  6.3% : ring_buffer_unlock_commit
              290.00 -  4.1% : ring_buffer_producer_thread 	[ring_buffer_benchmark]
              155.00 -  2.2% : debug_smp_processor_id
              117.00 -  1.7% : trace_recursive_unlock
              103.00 -  1.5% : ring_buffer_event_data
               28.00 -  0.4% : do_gettimeofday
               22.00 -  0.3% : _spin_unlock_irq
               14.00 -  0.2% : native_read_tsc
               11.00 -  0.2% : getnstimeofday

do_gettimeofday dropped from 13% usage to a mere 0.4%! (using the default
50 interval)  The measurement for each timestamp went from 310ns to 210ns.
That's 100ns (1/3rd) overhead that the gettimeofday call was introducing.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer_benchmark.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'kernel/trace/ring_buffer_benchmark.c')

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c..70df73e4ff2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -35,6 +35,10 @@ static int disable_reader;
 module_param(disable_reader, uint, 0644);
 MODULE_PARM_DESC(disable_reader, "only run producer");
 
+static int write_iteration = 50;
+module_param(write_iteration, uint, 0644);
+MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
+
 static int read_events;
 
 static int kill_test;
@@ -208,15 +212,18 @@ static void ring_buffer_producer(void)
 	do {
 		struct ring_buffer_event *event;
 		int *entry;
-
-		event = ring_buffer_lock_reserve(buffer, 10);
-		if (!event) {
-			missed++;
-		} else {
-			hit++;
-			entry = ring_buffer_event_data(event);
-			*entry = smp_processor_id();
-			ring_buffer_unlock_commit(buffer, event);
+		int i;
+
+		for (i = 0; i < write_iteration; i++) {
+			event = ring_buffer_lock_reserve(buffer, 10);
+			if (!event) {
+				missed++;
+			} else {
+				hit++;
+				entry = ring_buffer_event_data(event);
+				*entry = smp_processor_id();
+				ring_buffer_unlock_commit(buffer, event);
+			}
 		}
 		do_gettimeofday(&end_tv);
 
-- 
cgit v1.2.3-70-g09d2


From 98e4833ba3c314c99dc364012fba6ac894230ad0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Nov 2009 08:03:09 +0100
Subject: ring-buffer benchmark: Run producer/consumer threads at nice +19

The ring-buffer benchmark threads run on nice 0 by default, using
up a lot of CPU time and slowing down the system:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  1024 root      20   0     0    0    0 D 95.3  0.0   4:01.67 rb_producer
  1023 root      20   0     0    0    0 R 93.5  0.0   2:54.33 rb_consumer
 21569 mingo     40   0 14852 1048  772 R  3.6  0.1   0:00.05 top
     1 root      40   0  4080  928  668 S  0.0  0.0   0:23.98 init

Renice them to +19 to make them less intrusive.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/ring_buffer_benchmark.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/trace/ring_buffer_benchmark.c')

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 70df73e4ff2..3875d49da99 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -399,6 +399,12 @@ static int __init ring_buffer_benchmark_init(void)
 	if (IS_ERR(producer))
 		goto out_kill;
 
+	/*
+	 * Run them as low-prio background tasks by default:
+	 */
+	set_user_nice(consumer, 19);
+	set_user_nice(producer, 19);
+
 	return 0;
 
  out_kill:
-- 
cgit v1.2.3-70-g09d2


From 7ac074340480018681a0d72b324d4487543bdc0e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Nov 2009 13:22:21 -0500
Subject: ring-buffer-benchmark: Add parameters to set produce/consumer
 priorities

Running the ring-buffer-benchmark's threads at the lowest priority may
work well for keeping it in the background, but it is not appropriate
for the benchmarks.

This patch adds 4 parameters to the module:

  consumer_fifo
  consumer_nice
  producer_fifo
  producer_nice

By default the consumer and producer still run at nice +19.

If the *_fifo options are set, they will override the *_nice values.

 modprobe ring_buffer_benchmark consumer_nice=0 producer_fifo=10

The above will set the consumer thread to a nice value of 0, and
the producer thread to a RT SCHED_FIFO priority of 10.

Note, this patch also fixes a bug where calling set_user_nice on the
consumer thread would oops the kernel when the parameter "disable_reader"
is set.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer_benchmark.c | 58 ++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

(limited to 'kernel/trace/ring_buffer_benchmark.c')

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 3875d49da99..b2477caf09c 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -39,6 +39,24 @@ static int write_iteration = 50;
 module_param(write_iteration, uint, 0644);
 MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
 
+static int producer_nice = 19;
+static int consumer_nice = 19;
+
+static int producer_fifo = -1;
+static int consumer_fifo = -1;
+
+module_param(producer_nice, uint, 0644);
+MODULE_PARM_DESC(producer_nice, "nice prio for producer");
+
+module_param(consumer_nice, uint, 0644);
+MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
+
+module_param(producer_fifo, uint, 0644);
+MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
+
+module_param(consumer_fifo, uint, 0644);
+MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
+
 static int read_events;
 
 static int kill_test;
@@ -270,6 +288,27 @@ static void ring_buffer_producer(void)
 
 	if (kill_test)
 		trace_printk("ERROR!\n");
+
+	if (!disable_reader) {
+		if (consumer_fifo < 0)
+			trace_printk("Running Consumer at nice: %d\n",
+				     consumer_nice);
+		else
+			trace_printk("Running Consumer at SCHED_FIFO %d\n",
+				     consumer_fifo);
+	}
+	if (producer_fifo < 0)
+		trace_printk("Running Producer at nice: %d\n",
+			     producer_nice);
+	else
+		trace_printk("Running Producer at SCHED_FIFO %d\n",
+			     producer_fifo);
+
+	/* Let the user know that the test is running at low priority */
+	if (producer_fifo < 0 && consumer_fifo < 0 &&
+	    producer_nice == 19 && consumer_nice == 19)
+		trace_printk("WARNING!!! This test is running at lowest priority.\n");
+
 	trace_printk("Time:     %lld (usecs)\n", time);
 	trace_printk("Overruns: %lld\n", overruns);
 	if (disable_reader)
@@ -402,8 +441,23 @@ static int __init ring_buffer_benchmark_init(void)
 	/*
 	 * Run them as low-prio background tasks by default:
 	 */
-	set_user_nice(consumer, 19);
-	set_user_nice(producer, 19);
+	if (!disable_reader) {
+		if (consumer_fifo >= 0) {
+			struct sched_param param = {
+				.sched_priority = consumer_fifo
+			};
+			sched_setscheduler(consumer, SCHED_FIFO, &param);
+		} else
+			set_user_nice(consumer, consumer_nice);
+	}
+
+	if (producer_fifo >= 0) {
+		struct sched_param param = {
+			.sched_priority = consumer_fifo
+		};
+		sched_setscheduler(producer, SCHED_FIFO, &param);
+	} else
+		set_user_nice(producer, producer_nice);
 
 	return 0;
 
-- 
cgit v1.2.3-70-g09d2