From 83cd4fe27ad8446619b2e030b171b858501de87d Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 21 May 2010 17:09:41 -0700 Subject: sched: Change nohz idle load balancing logic to push model In the new push model, all idle CPUs indeed go into nohz mode. There is still the concept of idle load balancer (performing the load balancing on behalf of all the idle cpu's in the system). Busy CPU kicks the nohz balancer when any of the nohz CPUs need idle load balancing. The kickee CPU does the idle load balancing on behalf of all idle CPUs instead of the normal idle balance. This addresses the below two problems with the current nohz ilb logic: * the idle load balancer continued to have periodic ticks during idle and wokeup frequently, even though it did not have any rebalancing to do on behalf of any of the idle CPUs. * On x86 and CPUs that have APIC timer stoppage on idle CPUs, this periodic wakeup can result in a periodic additional interrupt on a CPU doing the timer broadcast. Also currently we are migrating the unpinned timers from an idle to the cpu doing idle load balancing (when all the cpus in the system are idle, there is no idle load balancing cpu and timers get added to the same idle cpu where the request was made. So the existing optimization works only on semi idle system). And In semi idle system, we no longer have periodic ticks on the idle load balancer CPU. Using that cpu will add more delays to the timers than intended (as that cpu's timer base may not be uptodate wrt jiffies etc). This was causing mysterious slowdowns during boot etc. For now, in the semi idle case, use the nearest busy cpu for migrating timers from an idle cpu. This is good for power-savings anyway. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1274486981.2840.46.camel@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5c69e996bd0..e934339fbbe 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, static int hrtimer_get_target(int this_cpu, int pinned) { #ifdef CONFIG_NO_HZ - if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { - int preferred_cpu = get_nohz_load_balancer(); - - if (preferred_cpu >= 0) - return preferred_cpu; - } + if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) + return get_nohz_timer_target(); #endif return this_cpu; } -- cgit v1.2.3-70-g09d2 From 8ab4351a4c888016620f43bde605b3d0964af339 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 13 Jul 2010 17:56:25 -0700 Subject: hrtimer: Cleanup direct access to wall_to_monotonic Provides an accessor function to replace hrtimer.c's direct access of wall_to_monotonic. This will allow wall_to_monotonic to be made static as planned in Documentation/feature-removal-schedule.txt Signed-off-by: John Stultz LKML-Reference: <1279068988-21864-9-git-send-email-johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 3 ++- kernel/hrtimer.c | 9 ++++----- kernel/time/timekeeping.c | 5 +++++ 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/include/linux/time.h b/include/linux/time.h index 9072df83de1..a57e0f67b3d 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -126,7 +126,8 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); -struct timespec __current_kernel_time(void); /* does not hold xtime_lock */ +struct timespec __current_kernel_time(void); /* does not take xtime_lock */ +struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */ struct timespec get_monotonic_coarse(void); #define CURRENT_TIME (current_kernel_time()) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5c69e996bd0..809f48c7055 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -90,7 +90,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) do { seq = read_seqbegin(&xtime_lock); xts = __current_kernel_time(); - tom = wall_to_monotonic; + tom = __get_wall_to_monotonic(); } while (read_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); @@ -612,7 +612,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base; - struct timespec realtime_offset; + struct timespec realtime_offset, wtm; unsigned long seq; if (!hrtimer_hres_active()) @@ -620,10 +620,9 @@ static void retrigger_next_event(void *arg) do { seq = read_seqbegin(&xtime_lock); - set_normalized_timespec(&realtime_offset, - -wall_to_monotonic.tv_sec, - -wall_to_monotonic.tv_nsec); + wtm = __get_wall_to_monotonic(); } while (read_seqretry(&xtime_lock, seq)); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); base = &__get_cpu_var(hrtimer_bases); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b15c3acafd5..fb61c2ed366 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -858,6 +858,11 @@ struct timespec __current_kernel_time(void) return xtime; } +struct timespec __get_wall_to_monotonic(void) +{ + return wall_to_monotonic; +} + struct timespec current_kernel_time(void) { struct timespec now; -- cgit v1.2.3-70-g09d2