From 43627582799db317e966ecb0002c2c3c9805ec0f Mon Sep 17 00:00:00 2001 From: Srinivasa Ds Date: Sat, 23 Feb 2008 15:24:04 -0800 Subject: kprobes: refuse kprobe insertion on add/sub_preempt_counter() Kprobes makes use of preempt_disable(),preempt_enable_noresched() and these functions inturn call add/sub_preempt_count(). So we need to refuse user from inserting probe in to these functions. This patch disallows user from probing add/sub_preempt_count(). Signed-off-by: Srinivasa DS Acked-by: Ananth N Mavinakayanahalli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index f28f19e65b5..c4bc8c21095 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3766,7 +3766,7 @@ void scheduler_tick(void) #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) -void add_preempt_count(int val) +void __kprobes add_preempt_count(int val) { /* * Underflow? @@ -3782,7 +3782,7 @@ void add_preempt_count(int val) } EXPORT_SYMBOL(add_preempt_count); -void sub_preempt_count(int val) +void __kprobes sub_preempt_count(int val) { /* * Underflow? -- cgit v1.2.3-70-g09d2 From 04e2f1741d235ba599037734878d72e57cb302b5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 23 Feb 2008 18:05:03 -0800 Subject: Add memory barrier semantics to wake_up() & co Oleg Nesterov and others have pointed out that on some architectures, the traditional sequence of set_current_state(TASK_INTERRUPTIBLE); if (CONDITION) return; schedule(); is racy wrt another CPU doing CONDITION = 1; wake_up_process(p); because while set_current_state() has a memory barrier separating setting of the TASK_INTERRUPTIBLE state from reading of the CONDITION variable, there is no such memory barrier on the wakeup side. Now, wake_up_process() does actually take a spinlock before it reads and sets the task state on the waking side, and on x86 (and many other architectures) that spinlock is in fact equivalent to a memory barrier, but that is not generally guaranteed. The write that sets CONDITION could move into the critical region protected by the runqueue spinlock. However, adding a smp_wmb() to before the spinlock should now order the writing of CONDITION wrt the lock itself, which in turn is ordered wrt the accesses within the spinlock (which includes the reading of the old state). This should thus close the race (which probably has never been seen in practice, but since smp_wmb() is a no-op on x86, it's not like this will make anything worse either on the most common architecture where the spinlock already gave the required protection). Acked-by: Oleg Nesterov Acked-by: Dmitry Adamushko Cc: Andrew Morton Cc: Nick Piggin Signed-off-by: Linus Torvalds --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index c4bc8c21095..b387a8de26a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1831,6 +1831,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) long old_state; struct rq *rq; + smp_wmb(); rq = task_rq_lock(p, &flags); old_state = p->state; if (!(old_state & state)) -- cgit v1.2.3-70-g09d2 From 6892b75e60557a48c01d57ba320419a9e2ce9846 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 14:02:36 +0100 Subject: sched: make early bootup sched_clock() use safer do not call sched_clock() too early. Not only might rq->idle not be set up - but pure per-cpu data might not be accessible either. this solves an ia64 early bootup hang with CONFIG_PRINTK_TIME=y. Tested-by: Tony Luck Acked-by: Tony Luck Acked-by: David S. Miller Signed-off-by: Ingo Molnar --- kernel/sched.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index b387a8de26a..7286ccb0108 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -668,6 +668,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; */ unsigned int sysctl_sched_rt_period = 1000000; +static __read_mostly int scheduler_running; + /* * part of the period that we allow rt tasks to run in us. * default: 0.95s @@ -689,14 +691,16 @@ unsigned long long cpu_clock(int cpu) unsigned long flags; struct rq *rq; - local_irq_save(flags); - rq = cpu_rq(cpu); /* * Only call sched_clock() if the scheduler has already been * initialized (some code might call cpu_clock() very early): */ - if (rq->idle) - update_rq_clock(rq); + if (unlikely(!scheduler_running)) + return 0; + + local_irq_save(flags); + rq = cpu_rq(cpu); + update_rq_clock(rq); now = rq->clock; local_irq_restore(flags); @@ -7284,6 +7288,8 @@ void __init sched_init(void) * During early bootup we pretend to be a normal task: */ current->sched_class = &fair_sched_class; + + scheduler_running = 1; } #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -- cgit v1.2.3-70-g09d2 From 67ca7bde2e9d3516b5ae0188330ad1059ac03f38 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 15 Feb 2008 09:56:36 -0800 Subject: sched: fix signedness warnings in sched.c Unsigned long values are always assigned to switch_count, make it unsigned long. kernel/sched.c:3897:15: warning: incorrect type in assignment (different signedness) kernel/sched.c:3897:15: expected long *switch_count kernel/sched.c:3897:15: got unsigned long * kernel/sched.c:3921:16: warning: incorrect type in assignment (different signedness) kernel/sched.c:3921:16: expected long *switch_count kernel/sched.c:3921:16: got unsigned long * Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 7286ccb0108..f06950c8a6c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3889,7 +3889,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev) asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; - long *switch_count; + unsigned long *switch_count; struct rq *rq; int cpu; -- cgit v1.2.3-70-g09d2