diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 73 |
1 files changed, 53 insertions, 20 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 933f3d1b62e..00ebd768667 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct cfs_rq *cfs_rq = task_cfs_rq(curr); int scale = cfs_rq->nr_running >= sched_nr_latency; - if (unlikely(rt_prio(p->prio))) - goto preempt; - - if (unlikely(p->sched_class != &fair_sched_class)) - return; - if (unlikely(se == pse)) return; @@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); check_preempt_curr(this_rq, p, 0); - - /* re-arm NEWIDLE balancing when moving tasks */ - src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; - this_rq->idle_stamp = 0; } /* @@ -2035,13 +2025,16 @@ struct sd_lb_stats { unsigned long this_load_per_task; unsigned long this_nr_running; unsigned long this_has_capacity; + unsigned int this_idle_cpus; /* Statistics of the busiest group */ + unsigned int busiest_idle_cpus; unsigned long max_load; unsigned long busiest_load_per_task; unsigned long busiest_nr_running; unsigned long busiest_group_capacity; unsigned long busiest_has_capacity; + unsigned int busiest_group_weight; int group_imb; /* Is there imbalance in this sd */ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -2063,6 +2056,8 @@ struct sg_lb_stats { unsigned long sum_nr_running; /* Nr tasks running in the group */ unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long group_capacity; + unsigned long idle_cpus; + unsigned long group_weight; int group_imb; /* Is there an imbalance in the group ? */ int group_has_capacity; /* Is there extra capacity in the group? */ }; @@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, sgs->group_load += load; sgs->sum_nr_running += rq->nr_running; sgs->sum_weighted_load += weighted_cpuload(i); - + if (idle_cpu(i)) + sgs->idle_cpus++; } /* @@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); if (!sgs->group_capacity) sgs->group_capacity = fix_small_capacity(sd, group); + sgs->group_weight = group->group_weight; if (sgs->group_capacity > sgs->sum_nr_running) sgs->group_has_capacity = 1; @@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, sds->this_nr_running = sgs.sum_nr_running; sds->this_load_per_task = sgs.sum_weighted_load; sds->this_has_capacity = sgs.group_has_capacity; + sds->this_idle_cpus = sgs.idle_cpus; } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { sds->max_load = sgs.avg_load; sds->busiest = sg; sds->busiest_nr_running = sgs.sum_nr_running; + sds->busiest_idle_cpus = sgs.idle_cpus; sds->busiest_group_capacity = sgs.group_capacity; sds->busiest_load_per_task = sgs.sum_weighted_load; sds->busiest_has_capacity = sgs.group_has_capacity; + sds->busiest_group_weight = sgs.group_weight; sds->group_imb = sgs.group_imb; } @@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sds.this_load >= sds.avg_load) goto out_balanced; - if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) - goto out_balanced; + /* + * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. + * And to check for busy balance use !idle_cpu instead of + * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE + * even when they are idle. + */ + if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { + if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) + goto out_balanced; + } else { + /* + * This cpu is idle. If the busiest group load doesn't + * have more tasks than the number of available cpu's and + * there is no imbalance between this and busiest group + * wrt to idle cpu's, it is balanced. + */ + if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && + sds.busiest_nr_running <= sds.busiest_group_weight) + goto out_balanced; + } force_balance: /* Looks like there is an imbalance. Compute it */ @@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; - if (pulled_task) + if (pulled_task) { + this_rq->idle_stamp = 0; break; + } } raw_spin_lock(&this_rq->lock); @@ -3869,13 +3889,26 @@ static void set_curr_task_fair(struct rq *rq) } #ifdef CONFIG_FAIR_GROUP_SCHED -static void moved_group_fair(struct task_struct *p, int on_rq) +static void task_move_group_fair(struct task_struct *p, int on_rq) { - struct cfs_rq *cfs_rq = task_cfs_rq(p); - - update_curr(cfs_rq); + /* + * If the task was not on the rq at the time of this cgroup movement + * it must have been asleep, sleeping tasks keep their ->vruntime + * absolute on their old rq until wakeup (needed for the fair sleeper + * bonus in place_entity()). + * + * If it was on the rq, we've just 'preempted' it, which does convert + * ->vruntime to a relative base. + * + * Make sure both cases convert their relative position when migrating + * to another cgroup's rq. This does somewhat interfere with the + * fair sleeper stuff for the first placement, but who cares. + */ + if (!on_rq) + p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; + set_task_rq(p, task_cpu(p)); if (!on_rq) - place_entity(cfs_rq, &p->se, 1); + p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; } #endif @@ -3927,7 +3960,7 @@ static const struct sched_class fair_sched_class = { .get_rr_interval = get_rr_interval_fair, #ifdef CONFIG_FAIR_GROUP_SCHED - .moved_group = moved_group_fair, + .task_move_group = task_move_group_fair, #endif }; |