summaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 09:11:18 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 09:11:18 -0700
commite6d5a11dad44b8ae18ca8fc4ecb72ccccfa0a2d2 (patch)
tree7e3837c8f28e2e969a7b7d040b00676c90bf72c7 /kernel/sched.c
parentb6257a9036f06878a0f02354d5a07f155e1cfee0 (diff)
parentb9dca1e0fcb696716840a3bc8f20a6941b484dbf (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: sched: fix new task startup crash sched: fix !SYSFS build breakage sched: fix improper load balance across sched domain sched: more robust sd-sysctl entry freeing
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c38
1 files changed, 31 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index c4889abc00b..92721d1534b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1712,7 +1712,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->prio = effective_prio(p);
- if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
+ if (!p->sched_class->task_new || !current->se.on_rq) {
activate_task(rq, p, 0);
} else {
/*
@@ -2336,7 +2336,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long max_pull;
unsigned long busiest_load_per_task, busiest_nr_running;
unsigned long this_load_per_task, this_nr_running;
- int load_idx;
+ int load_idx, group_imb = 0;
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
int power_savings_balance = 1;
unsigned long leader_nr_running = 0, min_load_per_task = 0;
@@ -2355,9 +2355,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
load_idx = sd->idle_idx;
do {
- unsigned long load, group_capacity;
+ unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
int local_group;
int i;
+ int __group_imb = 0;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long sum_nr_running, sum_weighted_load;
@@ -2368,6 +2369,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
/* Tally up the load of all CPUs in the group */
sum_weighted_load = sum_nr_running = avg_load = 0;
+ max_cpu_load = 0;
+ min_cpu_load = ~0UL;
for_each_cpu_mask(i, group->cpumask) {
struct rq *rq;
@@ -2388,8 +2391,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
}
load = target_load(i, load_idx);
- } else
+ } else {
load = source_load(i, load_idx);
+ if (load > max_cpu_load)
+ max_cpu_load = load;
+ if (min_cpu_load > load)
+ min_cpu_load = load;
+ }
avg_load += load;
sum_nr_running += rq->nr_running;
@@ -2415,6 +2423,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
avg_load = sg_div_cpu_power(group,
avg_load * SCHED_LOAD_SCALE);
+ if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE)
+ __group_imb = 1;
+
group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
if (local_group) {
@@ -2423,11 +2434,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
this_nr_running = sum_nr_running;
this_load_per_task = sum_weighted_load;
} else if (avg_load > max_load &&
- sum_nr_running > group_capacity) {
+ (sum_nr_running > group_capacity || __group_imb)) {
max_load = avg_load;
busiest = group;
busiest_nr_running = sum_nr_running;
busiest_load_per_task = sum_weighted_load;
+ group_imb = __group_imb;
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2499,6 +2511,9 @@ group_next:
goto out_balanced;
busiest_load_per_task /= busiest_nr_running;
+ if (group_imb)
+ busiest_load_per_task = min(busiest_load_per_task, avg_load);
+
/*
* We're trying to get all the cpus to the average_load, so we don't
* want to push ourselves above the average load, nor do we wish to
@@ -5282,11 +5297,20 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
static void sd_free_ctl_entry(struct ctl_table **tablep)
{
- struct ctl_table *entry = *tablep;
+ struct ctl_table *entry;
- for (entry = *tablep; entry->procname; entry++)
+ /*
+ * In the intermediate directories, both the child directory and
+ * procname are dynamically allocated and could fail but the mode
+ * will always be set. In the lowest directory the names are
+ * static strings and all have proc handlers.
+ */
+ for (entry = *tablep; entry->mode; entry++) {
if (entry->child)
sd_free_ctl_entry(&entry->child);
+ if (entry->proc_handler == NULL)
+ kfree(entry->procname);
+ }
kfree(*tablep);
*tablep = NULL;