summaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c115
1 files changed, 82 insertions, 33 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cb53fb1fe3..8dcdec6fe0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -301,7 +301,7 @@ struct cfs_rq {
/* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr;
+ struct sched_entity *curr, *next;
unsigned long nr_spread_over;
@@ -594,18 +594,14 @@ enum {
SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
SCHED_FEAT_WAKEUP_PREEMPT = 2,
SCHED_FEAT_START_DEBIT = 4,
- SCHED_FEAT_TREE_AVG = 8,
- SCHED_FEAT_APPROX_AVG = 16,
- SCHED_FEAT_HRTICK = 32,
- SCHED_FEAT_DOUBLE_TICK = 64,
+ SCHED_FEAT_HRTICK = 8,
+ SCHED_FEAT_DOUBLE_TICK = 16,
};
const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 |
SCHED_FEAT_WAKEUP_PREEMPT * 1 |
SCHED_FEAT_START_DEBIT * 1 |
- SCHED_FEAT_TREE_AVG * 0 |
- SCHED_FEAT_APPROX_AVG * 0 |
SCHED_FEAT_HRTICK * 1 |
SCHED_FEAT_DOUBLE_TICK * 0;
@@ -1056,6 +1052,49 @@ static void resched_cpu(int cpu)
resched_task(cpu_curr(cpu));
spin_unlock_irqrestore(&rq->lock, flags);
}
+
+#ifdef CONFIG_NO_HZ
+/*
+ * When add_timer_on() enqueues a timer into the timer wheel of an
+ * idle CPU then this timer might expire before the next timer event
+ * which is scheduled to wake up that CPU. In case of a completely
+ * idle system the next event might even be infinite time into the
+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
+ * leaves the inner idle loop so the newly added timer is taken into
+ * account when the CPU goes back to idle and evaluates the timer
+ * wheel for the next timer event.
+ */
+void wake_up_idle_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ if (cpu == smp_processor_id())
+ return;
+
+ /*
+ * This is safe, as this function is called with the timer
+ * wheel base lock of (cpu) held. When the CPU is on the way
+ * to idle and has not yet set rq->curr to idle then it will
+ * be serialized on the timer wheel base lock and take the new
+ * timer into account automatically.
+ */
+ if (rq->curr != rq->idle)
+ return;
+
+ /*
+ * We can set TIF_RESCHED on the idle task of the other CPU
+ * lockless. The worst case is that the other CPU runs the
+ * idle task through an additional NOOP schedule()
+ */
+ set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+
+ /* NEED_RESCHED must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(rq->idle))
+ smp_send_reschedule(cpu);
+}
+#endif
+
#else
static void __resched_task(struct task_struct *p, int tif_bit)
{
@@ -1084,7 +1123,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
u64 tmp;
if (unlikely(!lw->inv_weight))
- lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
+ lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
tmp = (u64)delta_exec * weight;
/*
@@ -1108,11 +1147,13 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
lw->weight += inc;
+ lw->inv_weight = 0;
}
static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
{
lw->weight -= dec;
+ lw->inv_weight = 0;
}
/*
@@ -1394,6 +1435,12 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
{
s64 delta;
+ /*
+ * Buddy candidates are cache hot:
+ */
+ if (&p->se == cfs_rq_of(&p->se)->next)
+ return 1;
+
if (p->sched_class != &fair_sched_class)
return 0;
@@ -1853,10 +1900,11 @@ out_activate:
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
activate_task(rq, p, 1);
- check_preempt_curr(rq, p);
success = 1;
out_running:
+ check_preempt_curr(rq, p);
+
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -1890,6 +1938,8 @@ static void __sched_fork(struct task_struct *p)
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
+ p->se.last_wakeup = 0;
+ p->se.avg_overlap = 0;
#ifdef CONFIG_SCHEDSTATS
p->se.wait_start = 0;
@@ -3875,7 +3925,7 @@ need_resched_nonpreemptible:
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
- unlikely(signal_pending(prev)))) {
+ signal_pending(prev))) {
prev->state = TASK_RUNNING;
} else {
deactivate_task(rq, prev, 1);
@@ -4268,11 +4318,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
oldprio = p->prio;
on_rq = p->se.on_rq;
running = task_current(rq, p);
- if (on_rq) {
+ if (on_rq)
dequeue_task(rq, p, 0);
- if (running)
- p->sched_class->put_prev_task(rq, p);
- }
+ if (running)
+ p->sched_class->put_prev_task(rq, p);
if (rt_prio(prio))
p->sched_class = &rt_sched_class;
@@ -4281,10 +4330,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
p->prio = prio;
+ if (running)
+ p->sched_class->set_curr_task(rq);
if (on_rq) {
- if (running)
- p->sched_class->set_curr_task(rq);
-
enqueue_task(rq, p, 0);
check_class_changed(rq, p, prev_class, oldprio, running);
@@ -4581,19 +4629,17 @@ recheck:
update_rq_clock(rq);
on_rq = p->se.on_rq;
running = task_current(rq, p);
- if (on_rq) {
+ if (on_rq)
deactivate_task(rq, p, 0);
- if (running)
- p->sched_class->put_prev_task(rq, p);
- }
+ if (running)
+ p->sched_class->put_prev_task(rq, p);
oldprio = p->prio;
__setscheduler(rq, p, policy, param->sched_priority);
+ if (running)
+ p->sched_class->set_curr_task(rq);
if (on_rq) {
- if (running)
- p->sched_class->set_curr_task(rq);
-
activate_task(rq, p, 0);
check_class_changed(rq, p, prev_class, oldprio, running);
@@ -6804,6 +6850,10 @@ static int ndoms_cur; /* number of sched domains in 'doms_cur' */
*/
static cpumask_t fallback_doms;
+void __attribute__((weak)) arch_update_cpu_topology(void)
+{
+}
+
/*
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
* For now this just excludes isolated cpus, but could be used to
@@ -6813,6 +6863,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
{
int err;
+ arch_update_cpu_topology();
ndoms_cur = 1;
doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!doms_cur)
@@ -6917,7 +6968,7 @@ match2:
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-static int arch_reinit_sched_domains(void)
+int arch_reinit_sched_domains(void)
{
int err;
@@ -7618,11 +7669,10 @@ void sched_move_task(struct task_struct *tsk)
running = task_current(rq, tsk);
on_rq = tsk->se.on_rq;
- if (on_rq) {
+ if (on_rq)
dequeue_task(rq, tsk, 0);
- if (unlikely(running))
- tsk->sched_class->put_prev_task(rq, tsk);
- }
+ if (unlikely(running))
+ tsk->sched_class->put_prev_task(rq, tsk);
set_task_rq(tsk, task_cpu(tsk));
@@ -7631,11 +7681,10 @@ void sched_move_task(struct task_struct *tsk)
tsk->sched_class->moved_group(tsk);
#endif
- if (on_rq) {
- if (unlikely(running))
- tsk->sched_class->set_curr_task(rq);
+ if (unlikely(running))
+ tsk->sched_class->set_curr_task(rq);
+ if (on_rq)
enqueue_task(rq, tsk, 0);
- }
task_rq_unlock(rq, &flags);
}