From c88d5910890ad35af283344417891344604f0438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2009 13:50:02 +0200 Subject: sched: Merge select_task_rq_fair() and sched_balance_self() The problem with wake_idle() is that is doesn't respect things like cpu_power, which means it doesn't deal well with SMT nor the recent RT interaction. To cure this, it needs to do what sched_balance_self() does, which leads to the possibility of merging select_task_rq_fair() and sched_balance_self(). Modify sched_balance_self() to: - update_shares() when walking up the domain tree, (it only called it for the top domain, but it should have done this anyway), which allows us to remove this ugly bit from try_to_wake_up(). - do wake_affine() on the smallest domain that contains both this (the waking) and the prev (the wakee) cpu for WAKE invocations. Then use the top-down balance steps it had to replace wake_idle(). This leads to the dissapearance of SD_WAKE_BALANCE and SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE. SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective. Touch all topology bits to replace the old with new SD flags -- platforms might need re-tuning, enabling SD_BALANCE_WAKE conditionally on a NUMA distance seems like a good additional feature, magny-core and small nehalem systems would want this enabled, systems with slow interconnects would not. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/topology.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 054a16d6808..c6343313ff5 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ - | SD_WAKE_IDLE \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ -- cgit v1.2.3-70-g09d2 From 78e7ed53c9f42f04f9401ada6f7047db60781676 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2009 13:16:51 +0200 Subject: sched: Tweak wake_idx When merging select_task_rq_fair() and sched_balance_self() we lost the use of wake_idx, restore that and set them to 0 to make wake balancing more aggressive. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +++-- arch/powerpc/include/asm/topology.h | 3 ++- arch/sh/include/asm/topology.h | 2 +- arch/sparc/include/asm/topology_64.h | 2 +- arch/x86/include/asm/topology.h | 2 +- include/linux/topology.h | 4 ++-- kernel/sched_fair.c | 21 ++++++++++++++++++--- 7 files changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index cf6053b226c..47f3c51d5e2 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -62,11 +62,12 @@ void build_cpu_to_node_map(void); .busy_idx = 2, \ .idle_idx = 1, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ @@ -87,7 +88,7 @@ void build_cpu_to_node_map(void); .busy_idx = 3, \ .idle_idx = 2, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c6343313ff5..a6b220ab56d 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -58,9 +58,10 @@ static inline int pcibus_to_node(struct pci_bus *bus) .busy_idx = 3, \ .idle_idx = 1, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index dc1531e2f25..9054e5c0ad5 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -16,7 +16,7 @@ .busy_idx = 3, \ .idle_idx = 2, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 1d091abd2d1..bc3a0930ed6 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -52,7 +52,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus) .busy_idx = 3, \ .idle_idx = 2, \ .newidle_idx = 0, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 966d58dc627..4b1b335097b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -138,7 +138,7 @@ extern unsigned long node_remap_size[]; .busy_idx = 3, \ .idle_idx = SD_IDLE_IDX, \ .newidle_idx = SD_NEWIDLE_IDX, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = SD_FORKEXEC_IDX, \ \ .flags = 1*SD_LOAD_BALANCE \ diff --git a/include/linux/topology.h b/include/linux/topology.h index 6a8cd15555b..fef57040a4e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -120,7 +120,7 @@ int arch_update_cpu_topology(void); .imbalance_pct = 125, \ .cache_nice_tries = 1, \ .busy_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ \ .flags = 1*SD_LOAD_BALANCE \ @@ -152,7 +152,7 @@ int arch_update_cpu_topology(void); .busy_idx = 2, \ .idle_idx = 1, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ \ .flags = 1*SD_LOAD_BALANCE \ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 8b3eddbcf9a..19593568031 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1232,12 +1232,27 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) * domain. */ static struct sched_group * -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +find_idlest_group(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int flag) { struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; unsigned long min_load = ULONG_MAX, this_load = 0; - int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; + int load_idx = 0; + + switch (flag) { + case SD_BALANCE_FORK: + case SD_BALANCE_EXEC: + load_idx = sd->forkexec_idx; + break; + + case SD_BALANCE_WAKE: + load_idx = sd->wake_idx; + break; + + default: + break; + } do { unsigned long load, avg_load; @@ -1392,7 +1407,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync) continue; } - group = find_idlest_group(sd, p, cpu); + group = find_idlest_group(sd, p, cpu, flag); if (!group) { sd = sd->child; continue; -- cgit v1.2.3-70-g09d2 From 0ec9fab3d186d9cbb00c0f694d4a260d07c198d9 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 15 Sep 2009 15:07:03 +0200 Subject: sched: Improve latencies and throughput Make the idle balancer more agressive, to improve a x264 encoding workload provided by Jason Garrett-Glaser: NEXT_BUDDY NO_LB_BIAS encoded 600 frames, 252.82 fps, 22096.60 kb/s encoded 600 frames, 250.69 fps, 22096.60 kb/s encoded 600 frames, 245.76 fps, 22096.60 kb/s NO_NEXT_BUDDY LB_BIAS encoded 600 frames, 344.44 fps, 22096.60 kb/s encoded 600 frames, 346.66 fps, 22096.60 kb/s encoded 600 frames, 352.59 fps, 22096.60 kb/s NO_NEXT_BUDDY NO_LB_BIAS encoded 600 frames, 425.75 fps, 22096.60 kb/s encoded 600 frames, 425.45 fps, 22096.60 kb/s encoded 600 frames, 422.49 fps, 22096.60 kb/s Peter pointed out that this is better done via newidle_idx, not via LB_BIAS, newidle balancing should look for where there is load _now_, not where there was load 2 ticks ago. Worst-case latencies are improved as well as no buddies means less vruntime spread. (as per prior lkml discussions) This change improves kbuild-peak parallelism as well. Reported-by: Jason Garrett-Glaser Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1253011667.9128.16.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +++-- arch/powerpc/include/asm/topology.h | 2 +- arch/sh/include/asm/topology.h | 3 ++- arch/x86/include/asm/topology.h | 4 +--- include/linux/topology.h | 2 +- kernel/sched_features.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 47f3c51d5e2..42f1673ec83 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -61,7 +61,7 @@ void build_cpu_to_node_map(void); .cache_nice_tries = 2, \ .busy_idx = 2, \ .idle_idx = 1, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ @@ -87,10 +87,11 @@ void build_cpu_to_node_map(void); .cache_nice_tries = 2, \ .busy_idx = 3, \ .idle_idx = 2, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ | SD_BALANCE_WAKE \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index a6b220ab56d..1a2c9eb42a0 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -57,7 +57,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) .cache_nice_tries = 1, \ .busy_idx = 3, \ .idle_idx = 1, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 9054e5c0ad5..c8436771e31 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -15,13 +15,14 @@ .cache_nice_tries = 2, \ .busy_idx = 3, \ .idle_idx = 2, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_BALANCE_WAKE \ + | SD_BALANCE_NEWIDLE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4b1b335097b..7fafd1bc414 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -116,14 +116,12 @@ extern unsigned long node_remap_size[]; # define SD_CACHE_NICE_TRIES 1 # define SD_IDLE_IDX 1 -# define SD_NEWIDLE_IDX 2 # define SD_FORKEXEC_IDX 0 #else # define SD_CACHE_NICE_TRIES 2 # define SD_IDLE_IDX 2 -# define SD_NEWIDLE_IDX 2 # define SD_FORKEXEC_IDX 1 #endif @@ -137,7 +135,7 @@ extern unsigned long node_remap_size[]; .cache_nice_tries = SD_CACHE_NICE_TRIES, \ .busy_idx = 3, \ .idle_idx = SD_IDLE_IDX, \ - .newidle_idx = SD_NEWIDLE_IDX, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = SD_FORKEXEC_IDX, \ \ diff --git a/include/linux/topology.h b/include/linux/topology.h index c87edcd8796..4298745615a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -151,7 +151,7 @@ int arch_update_cpu_topology(void); .cache_nice_tries = 1, \ .busy_idx = 2, \ .idle_idx = 1, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ \ diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 891ea0f72b4..e98c2e8de1d 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -67,7 +67,7 @@ SCHED_FEAT(AFFINE_WAKEUPS, 1) * wakeup-preemption), since its likely going to consume data we * touched, increases cache locality. */ -SCHED_FEAT(NEXT_BUDDY, 1) +SCHED_FEAT(NEXT_BUDDY, 0) /* * Prefer to schedule the task that ran last (when we did -- cgit v1.2.3-70-g09d2 From 182a85f8a119c789610a9d464f4129ded9f3c107 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Sep 2009 13:24:49 +0200 Subject: sched: Disable wakeup balancing Sysbench thinks SD_BALANCE_WAKE is too agressive and kbuild doesn't really mind too much, SD_BALANCE_NEWIDLE picks up most of the slack. On a dual socket, quad core, dual thread nehalem system: sysbench (--num_threads=16): SD_BALANCE_WAKE-: 13982 tx/s SD_BALANCE_WAKE+: 15688 tx/s kbuild (-j16): SD_BALANCE_WAKE-: 47.648295846 seconds time elapsed ( +- 0.312% ) SD_BALANCE_WAKE+: 47.608607360 seconds time elapsed ( +- 0.026% ) (same within noise) Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 -- arch/mips/include/asm/mach-ip27/topology.h | 1 - arch/powerpc/include/asm/topology.h | 1 - arch/sh/include/asm/topology.h | 1 - arch/sparc/include/asm/topology_64.h | 1 - arch/x86/include/asm/topology.h | 2 +- include/linux/topology.h | 6 +++--- 7 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 569b9dafc78..d0141fbf51d 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -68,7 +68,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -94,7 +93,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index d8332398f5b..23059170700 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; .cache_nice_tries = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 1a2c9eb42a0..394edcbcce7 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -63,7 +63,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index a8cc564b703..f8c40cc6505 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -21,7 +21,6 @@ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE \ | SD_BALANCE_NEWIDLE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 10b979d1de2..26cd25c0839 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 589f12383d7..6f0695d744b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -141,7 +141,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ diff --git a/include/linux/topology.h b/include/linux/topology.h index a6614b0242a..809b26c0709 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -95,7 +95,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ @@ -127,7 +127,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ @@ -160,7 +160,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ - | 1*SD_BALANCE_WAKE \ + | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ -- cgit v1.2.3-70-g09d2