From 67d955383ab2ef8866c494c14156a4f3d29e441c Mon Sep 17 00:00:00 2001
From: Hillf Danton <dhillf@gmail.com>
Date: Thu, 16 Jun 2011 21:55:18 -0400
Subject: sched: Remove noop in next_prio()

When computing the next priority for a given run-queue, the check for
RT priority of the task determined by the pick_next_highest_task_rt()
function could be removed, since only RT tasks are returned by the
function.

Reviewed-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/BANLkTimxmWiof9s5AvS3v_0X+sMiE=0x5g@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97540f0c9e4..e2698c0fc69 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -704,7 +704,7 @@ static inline int next_prio(struct rq *rq)
 {
 	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
 
-	if (next && rt_prio(next->prio))
+	if (next)
 		return next->prio;
 	else
 		return MAX_RT_PRIO;
-- 
cgit v1.2.3-70-g09d2


From 311e800e16f63d909136a64ed17ca353a160be59 Mon Sep 17 00:00:00 2001
From: Hillf Danton <dhillf@gmail.com>
Date: Thu, 16 Jun 2011 21:55:20 -0400
Subject: sched, rt: Fix rq->rt.pushable_tasks bug in push_rt_task()

Do not call dequeue_pushable_task() when failing to push an eligible
task, as it remains pushable, merely not at this particular moment.

Signed-off-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Mike Galbraith <mgalbraith@gmx.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/1306895385.4791.26.camel@marge.simson.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e2698c0fc69..8e189455ed1 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1394,6 +1394,7 @@ static int push_rt_task(struct rq *rq)
 {
 	struct task_struct *next_task;
 	struct rq *lowest_rq;
+	int ret = 0;
 
 	if (!rq->rt.overloaded)
 		return 0;
@@ -1426,7 +1427,7 @@ retry:
 	if (!lowest_rq) {
 		struct task_struct *task;
 		/*
-		 * find lock_lowest_rq releases rq->lock
+		 * find_lock_lowest_rq releases rq->lock
 		 * so it is possible that next_task has migrated.
 		 *
 		 * We need to make sure that the task is still on the same
@@ -1436,12 +1437,11 @@ retry:
 		task = pick_next_pushable_task(rq);
 		if (task_cpu(next_task) == rq->cpu && task == next_task) {
 			/*
-			 * If we get here, the task hasn't moved at all, but
-			 * it has failed to push.  We will not try again,
-			 * since the other cpus will pull from us when they
-			 * are ready.
+			 * The task hasn't migrated, and is still the next
+			 * eligible task, but we failed to find a run-queue
+			 * to push it to.  Do not retry in this case, since
+			 * other cpus will pull from us when ready.
 			 */
-			dequeue_pushable_task(rq, next_task);
 			goto out;
 		}
 
@@ -1460,6 +1460,7 @@ retry:
 	deactivate_task(rq, next_task, 0);
 	set_task_cpu(next_task, lowest_rq->cpu);
 	activate_task(lowest_rq, next_task, 0);
+	ret = 1;
 
 	resched_task(lowest_rq->curr);
 
@@ -1468,7 +1469,7 @@ retry:
 out:
 	put_task_struct(next_task);
 
-	return 1;
+	return ret;
 }
 
 static void push_rt_tasks(struct rq *rq)
-- 
cgit v1.2.3-70-g09d2


From 1812a643ccbfeb61a00a7f0d7219606e63d8815b Mon Sep 17 00:00:00 2001
From: Hillf Danton <dhillf@gmail.com>
Date: Thu, 16 Jun 2011 21:55:21 -0400
Subject: sched: Remove resetting exec_start in put_prev_task_rt()

There's no reason to clean the exec_start in put_prev_task_rt() as it is reset
when the task gets back to the run queue. This saves us doing a store() in the
fast path.

Signed-off-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/BANLkTimqWD=q6YnSDi-v9y=LMWecgEzEWg@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8e189455ed1..70107a39b1b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1178,7 +1178,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);
-	p->se.exec_start = 0;
 
 	/*
 	 * The previous task needs to be made eligible for pushing
-- 
cgit v1.2.3-70-g09d2


From c37495fd0f64fc139b5a07d242bcb485174d1206 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Jun 2011 21:55:22 -0400
Subject: sched: Balance RT tasks when forked as well

When a new task is woken, the code to balance the RT task is currently
skipped in the select_task_rq() call. But it will be pushed if the rq
is currently overloaded with RT tasks anyway. The issue is that we
already queued the task, and if it does get pushed, it will have to
be dequeued and requeued on the new run queue. The advantage with
pushing it first is that we avoid this requeuing as we are pushing it
off before the task is ever queued.

See commit 318e0893ce3f524 ("sched: pre-route RT tasks on wakeup")
for more details.

The return of select_task_rq() when it is not a wake up has also been
changed to return task_cpu() instead of smp_processor_id(). This is more
of a sanity because the current only other user of select_task_rq()
besides wake ups, is an exec, where task_cpu() should also be the same
as smp_processor_id(). But if it is used for other purposes, lets keep
the task on the same CPU. Why would we mant to migrate it to the current
CPU?

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hillf Danton <dhillf@gmail.com>
Link: http://lkml.kernel.org/r/20110617015919.832743148@goodmis.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 70107a39b1b..2153a87e615 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1017,10 +1017,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 	struct rq *rq;
 	int cpu;
 
-	if (sd_flag != SD_BALANCE_WAKE)
-		return smp_processor_id();
-
 	cpu = task_cpu(p);
+
+	/* For anything but wake ups, just return the task_cpu */
+	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
+		goto out;
+
 	rq = cpu_rq(cpu);
 
 	rcu_read_lock();
@@ -1059,6 +1061,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 	}
 	rcu_read_unlock();
 
+out:
 	return cpu;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5181f4a46afd99e5e85c639b189e43e0a42b53df Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Jun 2011 21:55:23 -0400
Subject: sched: Use pushable_tasks to determine next highest prio

Hillf Danton proposed a patch (see link) that cleaned up the
sched_rt code that calculates the priority of the next highest priority
task to be used in finding run queues to pull from.

His patch removed the calculating of the next prio to just use the current
prio when deteriming if we should examine a run queue to pull from. The problem
with his patch was that it caused more false checks. Because we check a run
queue for pushable tasks if the current priority of that run queue is higher
in priority than the task about to run on our run queue. But after grabbing
the locks and doing the real check, we find that there may not be a task
that has a higher prio task to pull. Thus the locks were taken with nothing to
do.

I added some trace_printks() to record when and how many times the run queue
locks were taken to check for pullable tasks, compared to how many times we
pulled a task.

With the current method, it was:

  3806 locks taken vs 2812 pulled tasks

With Hillf's patch:

  6728 locks taken vs 2804 pulled tasks

The number of times locks were taken to pull a task went up almost double with
no more success rate.

But his patch did get me thinking. When we look at the priority of the highest
task to consider taking the locks to do a pull, a failure to pull can be one
of the following: (in order of most likely)

 o RT task was pushed off already between the check and taking the lock
 o Waiting RT task can not be migrated
 o RT task's CPU affinity does not include the target run queue's CPU
 o RT task's priority changed between the check and taking the lock

And with Hillf's patch, the thing that caused most of the failures, is
the RT task to pull was not at the right priority to pull (not greater than
the current RT task priority on the target run queue).

Most of the above cases we can't help. But the current method does not check
if the next highest prio RT task can be migrated or not, and if it can not,
we still grab the locks to do the test (we don't find out about this fact until
after we have the locks). I thought about this case, and realized that the
pushable task plist that is maintained only holds RT tasks that can migrate.
If we move the calculating of the next highest prio task from the inc/dec_rt_task()
functions into the queuing of the pushable tasks, then we only measure the
priorities of those tasks that we push, and we get this basically for free.

Not only does this patch make the code a little more efficient, it cleans it
up and makes it a little simpler.

Thanks to Hillf Danton for inspiring me on this patch.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Gregory Haskins <ghaskins@novell.com>
Link: http://lkml.kernel.org/r/BANLkTimQ67180HxCx5vgMqumqw1EkFh3qg@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 61 ++++++++++++++++---------------------------------------
 1 file changed, 18 insertions(+), 43 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2153a87e615..a8c207ff349 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	update_rt_migration(rt_rq);
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 {
 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 	plist_node_init(&p->pushable_tasks, p->prio);
 	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
+
+	/* Update the highest prio pushable task */
+	if (p->prio < rq->rt.highest_prio.next)
+		rq->rt.highest_prio.next = p->prio;
 }
 
 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 {
 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
-}
 
-static inline int has_pushable_tasks(struct rq *rq)
-{
-	return !plist_head_empty(&rq->rt.pushable_tasks);
+	/* Update the new highest prio pushable task */
+	if (has_pushable_tasks(rq)) {
+		p = plist_first_entry(&rq->rt.pushable_tasks,
+				      struct task_struct, pushable_tasks);
+		rq->rt.highest_prio.next = p->prio;
+	} else
+		rq->rt.highest_prio.next = MAX_RT_PRIO;
 }
 
 #else
@@ -698,47 +710,13 @@ static void update_curr_rt(struct rq *rq)
 
 #if defined CONFIG_SMP
 
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
-
-static inline int next_prio(struct rq *rq)
-{
-	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
-
-	if (next)
-		return next->prio;
-	else
-		return MAX_RT_PRIO;
-}
-
 static void
 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
 	struct rq *rq = rq_of_rt_rq(rt_rq);
 
-	if (prio < prev_prio) {
-
-		/*
-		 * If the new task is higher in priority than anything on the
-		 * run-queue, we know that the previous high becomes our
-		 * next-highest.
-		 */
-		rt_rq->highest_prio.next = prev_prio;
-
-		if (rq->online)
-			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
-
-	} else if (prio == rt_rq->highest_prio.curr)
-		/*
-		 * If the next task is equal in priority to the highest on
-		 * the run-queue, then we implicitly know that the next highest
-		 * task cannot be any lower than current
-		 */
-		rt_rq->highest_prio.next = prio;
-	else if (prio < rt_rq->highest_prio.next)
-		/*
-		 * Otherwise, we need to recompute next-highest
-		 */
-		rt_rq->highest_prio.next = next_prio(rq);
+	if (rq->online && prio < prev_prio)
+		cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
 }
 
 static void
@@ -746,9 +724,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
 	struct rq *rq = rq_of_rt_rq(rt_rq);
 
-	if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
-		rt_rq->highest_prio.next = next_prio(rq);
-
 	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
 		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
 }
-- 
cgit v1.2.3-70-g09d2


From 953bfcd10e6f3697233e8e5128c611d275da39c1 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Thu, 21 Jul 2011 09:43:27 -0700
Subject: sched: Implement hierarchical task accounting for SCHED_OTHER

Introduce hierarchical task accounting for the group scheduling case in CFS, as
well as promoting the responsibility for maintaining rq->nr_running to the
scheduling classes.

The primary motivation for this is that with scheduling classes supporting
bandwidth throttling it is possible for entities participating in throttled
sub-trees to not have root visible changes in rq->nr_running across activate
and de-activate operations.  This in turn leads to incorrect idle and
weight-per-task load balance decisions.

This also allows us to make a small fixlet to the fastpath in pick_next_task()
under group scheduling.

Note: this issue also exists with the existing sched_rt throttling mechanism.
This patch does not address that.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184756.878333391@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c          | 6 ++----
 kernel/sched_fair.c     | 6 ++++++
 kernel/sched_rt.c       | 5 ++++-
 kernel/sched_stoptask.c | 2 ++
 4 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index cf427bb2b65..cd1a531ca8f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -311,7 +311,7 @@ struct task_group root_task_group;
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
 	struct load_weight load;
-	unsigned long nr_running;
+	unsigned long nr_running, h_nr_running;
 
 	u64 exec_clock;
 	u64 min_vruntime;
@@ -1802,7 +1802,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, flags);
-	inc_nr_running(rq);
 }
 
 /*
@@ -1814,7 +1813,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 		rq->nr_uninterruptible++;
 
 	dequeue_task(rq, p, flags);
-	dec_nr_running(rq);
 }
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -4258,7 +4256,7 @@ pick_next_task(struct rq *rq)
 	 * Optimization: we know that if all tasks are in
 	 * the fair class we can call that function directly:
 	 */
-	if (likely(rq->nr_running == rq->cfs.nr_running)) {
+	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
 		p = fair_sched_class.pick_next_task(rq);
 		if (likely(p))
 			return p;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4b732a3552..f86b0cb5eb2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1310,16 +1310,19 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			break;
 		cfs_rq = cfs_rq_of(se);
 		enqueue_entity(cfs_rq, se, flags);
+		cfs_rq->h_nr_running++;
 		flags = ENQUEUE_WAKEUP;
 	}
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
+		cfs_rq->h_nr_running++;
 
 		update_cfs_load(cfs_rq, 0);
 		update_cfs_shares(cfs_rq);
 	}
 
+	inc_nr_running(rq);
 	hrtick_update(rq);
 }
 
@@ -1339,6 +1342,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
+		cfs_rq->h_nr_running--;
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
@@ -1358,11 +1362,13 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
+		cfs_rq->h_nr_running--;
 
 		update_cfs_load(cfs_rq, 0);
 		update_cfs_shares(cfs_rq);
 	}
 
+	dec_nr_running(rq);
 	hrtick_update(rq);
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a8c207ff349..a9d3c6bc684 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -936,6 +936,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
+
+	inc_nr_running(rq);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -946,6 +948,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	dequeue_rt_entity(rt_se);
 
 	dequeue_pushable_task(rq, p);
+
+	dec_nr_running(rq);
 }
 
 /*
@@ -1841,4 +1845,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 	rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
-
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 6f437632afa..8b44e7fa7fb 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 static void
 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
+	inc_nr_running(rq);
 }
 
 static void
 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
+	dec_nr_running(rq);
 }
 
 static void yield_task_stop(struct rq *rq)
-- 
cgit v1.2.3-70-g09d2


From fa17b507f142d37aeac322a95f6f7c6375f25601 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Jun 2011 12:23:22 +0200
Subject: sched: Wrap scheduler p->cpus_allowed access

This task is preparatory for the migrate_disable() implementation, but
stands on its own and provides a cleanup.

It currently only converts those sites required for task-placement.
Kosaki-san once mentioned replacing cpus_allowed with a proper
cpumask_t instead of the NR_CPUS sized array it currently is, that
would also require something like this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Link: http://lkml.kernel.org/n/tip-e42skvaddos99psip0vce41o@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c         |  8 ++++----
 kernel/sched_fair.c    | 12 ++++++------
 kernel/sched_rt.c      |  4 ++--
 lib/smp_processor_id.c |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index 7bc9b0e84eb..45174ca5c8e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2544,11 +2544,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
 	/* Look for allowed, online CPU in same node. */
 	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
-		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+		if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
 			return dest_cpu;
 
 	/* Any allowed, online CPU? */
-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+	dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
 	if (dest_cpu < nr_cpu_ids)
 		return dest_cpu;
 
@@ -2585,7 +2585,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 	 * [ this allows ->select_task() to simply return task_cpu(p) and
 	 *   not worry about this generic constraint ]
 	 */
-	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+	if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
 		     !cpu_online(cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
@@ -6262,7 +6262,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (task_cpu(p) != src_cpu)
 		goto done;
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
 		goto fail;
 
 	/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 506db0966eb..5c9e67923b7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2183,7 +2183,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
 		/* Skip over this group if it has no CPUs allowed */
 		if (!cpumask_intersects(sched_group_cpus(group),
-					&p->cpus_allowed))
+					tsk_cpus_allowed(p)))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
@@ -2229,7 +2229,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2273,7 +2273,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
 			break;
 
-		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+		for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) {
 			if (idle_cpu(i)) {
 				target = i;
 				break;
@@ -2316,7 +2316,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 	int sync = wake_flags & WF_SYNC;
 
 	if (sd_flag & SD_BALANCE_WAKE) {
-		if (cpumask_test_cpu(cpu, &p->cpus_allowed))
+		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			want_affine = 1;
 		new_cpu = prev_cpu;
 	}
@@ -2697,7 +2697,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
+	if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) {
 		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
 		return 0;
 	}
@@ -4087,7 +4087,7 @@ redo:
 			 * moved to this_cpu
 			 */
 			if (!cpumask_test_cpu(this_cpu,
-					      &busiest->curr->cpus_allowed)) {
+					tsk_cpus_allowed(busiest->curr))) {
 				raw_spin_unlock_irqrestore(&busiest->lock,
 							    flags);
 				all_pinned = 1;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 0cc188cf766..57a10842afa 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1179,7 +1179,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
+	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
 	    (p->rt.nr_cpus_allowed > 1))
 		return 1;
 	return 0;
@@ -1324,7 +1324,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 */
 			if (unlikely(task_rq(task) != rq ||
 				     !cpumask_test_cpu(lowest_rq->cpu,
-						       &task->cpus_allowed) ||
+						       tsk_cpus_allowed(task)) ||
 				     task_running(rq, task) ||
 				     !task->on_rq)) {
 
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4689cb073da..503f087382a 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
+	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
 		goto out;
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 4939602a2441306008c6dca38216b741d4e09a42 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 25 Jun 2011 15:45:46 +0200
Subject: sched: Unify the ->cpus_allowed mask copy

Currently every sched_class::set_cpus_allowed() implementation has to
copy the cpumask into task_struct::cpus_allowed, this is pointless,
put this copy in the generic code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/n/tip-jhl5s9fckd9ptw1fzbqqlrd3@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c    | 7 +++----
 kernel/sched_rt.c | 3 ---
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched.c b/kernel/sched.c
index 45174ca5c8e..ce9a9e7db11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6161,10 +6161,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
 	if (p->sched_class && p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
-	else {
-		cpumask_copy(&p->cpus_allowed, new_mask);
-		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-	}
+
+	cpumask_copy(&p->cpus_allowed, new_mask);
+	p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
 }
 
 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 57a10842afa..3023fd1ef36 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1608,9 +1608,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 
 		update_rt_migration(&rq->rt);
 	}
-
-	cpumask_copy(&p->cpus_allowed, new_mask);
-	p->rt.nr_cpus_allowed = weight;
 }
 
 /* Assumes rq->lock is held */
-- 
cgit v1.2.3-70-g09d2


From 1c83437e80186832a9a48dbb6b8868d28e40e562 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 Oct 2011 13:32:34 +0200
Subject: sched: Warn on rt throttling

The default rt-throttling is a source of never ending questions. Warn
once when we go into throttling so folks have that info in dmesg.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1110051331480.18778@ionos
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_rt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/sched_rt.c')

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3023fd1ef36..056cbd2e2a2 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -655,6 +655,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
 	if (rt_rq->rt_time > runtime) {
 		rt_rq->rt_throttled = 1;
+		printk_once(KERN_WARNING "sched: RT throttling activated\n");
 		if (rt_rq_throttled(rt_rq)) {
 			sched_rt_rq_dequeue(rt_rq);
 			return 1;
-- 
cgit v1.2.3-70-g09d2