From 43627582799db317e966ecb0002c2c3c9805ec0f Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Sat, 23 Feb 2008 15:24:04 -0800
Subject: kprobes: refuse kprobe insertion on add/sub_preempt_counter()

Kprobes makes use of preempt_disable(),preempt_enable_noresched() and these
functions inturn call add/sub_preempt_count().  So we need to refuse user from
inserting probe in to these functions.

This patch disallows user from probing add/sub_preempt_count().

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/sched.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index f28f19e65b5..c4bc8c21095 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3766,7 +3766,7 @@ void scheduler_tick(void)
 
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
 
-void add_preempt_count(int val)
+void __kprobes add_preempt_count(int val)
 {
 	/*
 	 * Underflow?
@@ -3782,7 +3782,7 @@ void add_preempt_count(int val)
 }
 EXPORT_SYMBOL(add_preempt_count);
 
-void sub_preempt_count(int val)
+void __kprobes sub_preempt_count(int val)
 {
 	/*
 	 * Underflow?
-- 
cgit v1.2.3-70-g09d2


From 04e2f1741d235ba599037734878d72e57cb302b5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.linux-foundation.org>
Date: Sat, 23 Feb 2008 18:05:03 -0800
Subject: Add memory barrier semantics to wake_up() & co

Oleg Nesterov and others have pointed out that on some architectures,
the traditional sequence of

	set_current_state(TASK_INTERRUPTIBLE);
	if (CONDITION)
		return;
	schedule();

is racy wrt another CPU doing

	CONDITION = 1;
	wake_up_process(p);

because while set_current_state() has a memory barrier separating
setting of the TASK_INTERRUPTIBLE state from reading of the CONDITION
variable, there is no such memory barrier on the wakeup side.

Now, wake_up_process() does actually take a spinlock before it reads and
sets the task state on the waking side, and on x86 (and many other
architectures) that spinlock is in fact equivalent to a memory barrier,
but that is not generally guaranteed.  The write that sets CONDITION
could move into the critical region protected by the runqueue spinlock.

However, adding a smp_wmb() to before the spinlock should now order the
writing of CONDITION wrt the lock itself, which in turn is ordered wrt
the accesses within the spinlock (which includes the reading of the old
state).

This should thus close the race (which probably has never been seen in
practice, but since smp_wmb() is a no-op on x86, it's not like this will
make anything worse either on the most common architecture where the
spinlock already gave the required protection).

Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/sched.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index c4bc8c21095..b387a8de26a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1831,6 +1831,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	long old_state;
 	struct rq *rq;
 
+	smp_wmb();
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
 	if (!(old_state & state))
-- 
cgit v1.2.3-70-g09d2


From 6892b75e60557a48c01d57ba320419a9e2ce9846 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Feb 2008 14:02:36 +0100
Subject: sched: make early bootup sched_clock() use safer

do not call sched_clock() too early. Not only might rq->idle
not be set up - but pure per-cpu data might not be accessible
either.

this solves an ia64 early bootup hang with CONFIG_PRINTK_TIME=y.

Tested-by: Tony Luck <tony.luck@gmail.com>
Acked-by: Tony Luck <tony.luck@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'kernel/sched.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index b387a8de26a..7286ccb0108 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -668,6 +668,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
  */
 unsigned int sysctl_sched_rt_period = 1000000;
 
+static __read_mostly int scheduler_running;
+
 /*
  * part of the period that we allow rt tasks to run in us.
  * default: 0.95s
@@ -689,14 +691,16 @@ unsigned long long cpu_clock(int cpu)
 	unsigned long flags;
 	struct rq *rq;
 
-	local_irq_save(flags);
-	rq = cpu_rq(cpu);
 	/*
 	 * Only call sched_clock() if the scheduler has already been
 	 * initialized (some code might call cpu_clock() very early):
 	 */
-	if (rq->idle)
-		update_rq_clock(rq);
+	if (unlikely(!scheduler_running))
+		return 0;
+
+	local_irq_save(flags);
+	rq = cpu_rq(cpu);
+	update_rq_clock(rq);
 	now = rq->clock;
 	local_irq_restore(flags);
 
@@ -7284,6 +7288,8 @@ void __init sched_init(void)
 	 * During early bootup we pretend to be a normal task:
 	 */
 	current->sched_class = &fair_sched_class;
+
+	scheduler_running = 1;
 }
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-- 
cgit v1.2.3-70-g09d2


From 67ca7bde2e9d3516b5ae0188330ad1059ac03f38 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 15 Feb 2008 09:56:36 -0800
Subject: sched: fix signedness warnings in sched.c

Unsigned long values are always assigned to switch_count,
make it unsigned long.

kernel/sched.c:3897:15: warning: incorrect type in assignment (different signedness)
kernel/sched.c:3897:15:    expected long *switch_count
kernel/sched.c:3897:15:    got unsigned long *<noident>
kernel/sched.c:3921:16: warning: incorrect type in assignment (different signedness)
kernel/sched.c:3921:16:    expected long *switch_count
kernel/sched.c:3921:16:    got unsigned long *<noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index 7286ccb0108..f06950c8a6c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3889,7 +3889,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
 asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
-	long *switch_count;
+	unsigned long *switch_count;
 	struct rq *rq;
 	int cpu;
 
-- 
cgit v1.2.3-70-g09d2