summaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c144
1 files changed, 92 insertions, 52 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 10d218ab69f..37087a7fac2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
#ifdef CONFIG_SCHED_DEBUG
int sched_nr_latency_handler(struct ctl_table *table, int write,
- struct file *filp, void __user *buffer, size_t *lenp,
+ void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);
+ trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cpuacct_charge(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
}
@@ -709,31 +710,28 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
if (initial && sched_feat(START_DEBIT))
vruntime += sched_vslice(cfs_rq, se);
- if (!initial) {
- /* sleeps upto a single latency don't count. */
- if (sched_feat(FAIR_SLEEPERS)) {
- unsigned long thresh = sysctl_sched_latency;
+ /* sleeps up to a single latency don't count. */
+ if (!initial && sched_feat(FAIR_SLEEPERS)) {
+ unsigned long thresh = sysctl_sched_latency;
- /*
- * Convert the sleeper threshold into virtual time.
- * SCHED_IDLE is a special sub-class. We care about
- * fairness only relative to other SCHED_IDLE tasks,
- * all of which have the same weight.
- */
- if (sched_feat(NORMALIZED_SLEEPER) &&
- (!entity_is_task(se) ||
- task_of(se)->policy != SCHED_IDLE))
- thresh = calc_delta_fair(thresh, se);
+ /*
+ * Convert the sleeper threshold into virtual time.
+ * SCHED_IDLE is a special sub-class. We care about
+ * fairness only relative to other SCHED_IDLE tasks,
+ * all of which have the same weight.
+ */
+ if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) ||
+ task_of(se)->policy != SCHED_IDLE))
+ thresh = calc_delta_fair(thresh, se);
- /*
- * Halve their sleep time's effect, to allow
- * for a gentler effect of sleepers:
- */
- if (sched_feat(GENTLE_FAIR_SLEEPERS))
- thresh >>= 1;
+ /*
+ * Halve their sleep time's effect, to allow
+ * for a gentler effect of sleepers:
+ */
+ if (sched_feat(GENTLE_FAIR_SLEEPERS))
+ thresh >>= 1;
- vruntime -= thresh;
- }
+ vruntime -= thresh;
}
/* ensure we never gain time by being placed backwards. */
@@ -824,6 +822,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
* re-elected due to buddy favours.
*/
clear_buddies(cfs_rq, curr);
+ return;
+ }
+
+ /*
+ * Ensure that a task that missed wakeup preemption by a
+ * narrow margin doesn't have to wait for a full slice.
+ * This also mitigates buddy induced latencies under load.
+ */
+ if (!sched_feat(WAKEUP_PREEMPT))
+ return;
+
+ if (delta_exec < sysctl_sched_min_granularity)
+ return;
+
+ if (cfs_rq->nr_running > 1) {
+ struct sched_entity *se = __pick_next_entity(cfs_rq);
+ s64 delta = curr->vruntime - se->vruntime;
+
+ if (delta > ideal_runtime)
+ resched_task(rq_of(cfs_rq)->curr);
}
}
@@ -863,12 +881,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
struct sched_entity *se = __pick_next_entity(cfs_rq);
+ struct sched_entity *left = se;
- if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
- return cfs_rq->next;
+ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
+ se = cfs_rq->next;
+
+ /*
+ * Prefer last buddy, try to return the CPU to a preempted task.
+ */
+ if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
+ se = cfs_rq->last;
- if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
- return cfs_rq->last;
+ clear_buddies(cfs_rq, se);
return se;
}
@@ -1342,7 +1366,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
int sync = wake_flags & WF_SYNC;
if (sd_flag & SD_BALANCE_WAKE) {
- if (sched_feat(AFFINE_WAKEUPS))
+ if (sched_feat(AFFINE_WAKEUPS) &&
+ cpumask_test_cpu(cpu, &p->cpus_allowed))
want_affine = 1;
new_cpu = prev_cpu;
}
@@ -1569,6 +1594,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct sched_entity *se = &curr->se, *pse = &p->se;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int sync = wake_flags & WF_SYNC;
+ int scale = cfs_rq->nr_running >= sched_nr_latency;
update_curr(cfs_rq);
@@ -1583,18 +1609,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (unlikely(se == pse))
return;
- /*
- * Only set the backward buddy when the current task is still on the
- * rq. This can happen when a wakeup gets interleaved with schedule on
- * the ->pre_schedule() or idle_balance() point, either of which can
- * drop the rq lock.
- *
- * Also, during early boot the idle thread is in the fair class, for
- * obvious reasons its a bad idea to schedule back to the idle thread.
- */
- if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
- set_last_buddy(se);
- if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
+ if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))
set_next_buddy(pse);
/*
@@ -1640,8 +1655,22 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
BUG_ON(!pse);
- if (wakeup_preempt_entity(se, pse) == 1)
+ if (wakeup_preempt_entity(se, pse) == 1) {
resched_task(curr);
+ /*
+ * Only set the backward buddy when the current task is still
+ * on the rq. This can happen when a wakeup gets interleaved
+ * with schedule on the ->pre_schedule() or idle_balance()
+ * point, either of which can * drop the rq lock.
+ *
+ * Also, during early boot the idle thread is in the fair class,
+ * for obvious reasons its a bad idea to schedule back to it.
+ */
+ if (unlikely(!se->on_rq || curr == rq->idle))
+ return;
+ if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
+ set_last_buddy(se);
+ }
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1655,16 +1684,6 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
do {
se = pick_next_entity(cfs_rq);
- /*
- * If se was a buddy, clear it so that it will have to earn
- * the favour again.
- *
- * If se was not a buddy, clear the buddies because neither
- * was elegible to run, let them earn it again.
- *
- * IOW. unconditionally clear buddies.
- */
- __clear_buddies(cfs_rq, NULL);
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
@@ -1940,6 +1959,25 @@ static void moved_group_fair(struct task_struct *p)
}
#endif
+unsigned int get_rr_interval_fair(struct task_struct *task)
+{
+ struct sched_entity *se = &task->se;
+ unsigned long flags;
+ struct rq *rq;
+ unsigned int rr_interval = 0;
+
+ /*
+ * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+ * idle runqueue:
+ */
+ rq = task_rq_lock(task, &flags);
+ if (rq->cfs.load.weight)
+ rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+ task_rq_unlock(rq, &flags);
+
+ return rr_interval;
+}
+
/*
* All the scheduling class methods:
*/
@@ -1968,6 +2006,8 @@ static const struct sched_class fair_sched_class = {
.prio_changed = prio_changed_fair,
.switched_to = switched_to_fair,
+ .get_rr_interval = get_rr_interval_fair,
+
#ifdef CONFIG_FAIR_GROUP_SCHED
.moved_group = moved_group_fair,
#endif