From a2bd92023357e47f22a34d4cb1635453546662bc Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 30 Jul 2008 19:21:42 -0700 Subject: cpuidle: Do not use poll_idle unless user asks for it poll_idle was added to CPUIDLE, just as a low latency idle handler, to be used in cases when user desires CPUs not to enter any idle state at all. It was supposed to be a run time idle=poll option to the user. But, it was indeed getting used during normal menu and ladder governor default case, with no special user setting (Reported by Linus Torvalds). Change below ensures that poll_idle will not be used unless user explicitly asks pm_qos infrastructure for zero latency requirement. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- drivers/cpuidle/governors/ladder.c | 14 ++++++++++---- drivers/cpuidle/governors/menu.c | 11 +++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index ba7b9a6b17a..27ab3bfe375 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -67,10 +67,17 @@ static int ladder_select_state(struct cpuidle_device *dev) struct ladder_device *ldev = &__get_cpu_var(ladder_devices); struct ladder_device_state *last_state; int last_residency, last_idx = ldev->last_state_idx; + int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY); if (unlikely(!ldev)) return 0; + /* Special case when user has set very strict latency requirement */ + if (unlikely(latency_req == 0)) { + ladder_do_selection(ldev, last_idx, 0); + return 0; + } + last_state = &ldev->states[last_idx]; if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) @@ -81,8 +88,7 @@ static int ladder_select_state(struct cpuidle_device *dev) /* consider promotion */ if (last_idx < dev->state_count - 1 && last_residency > last_state->threshold.promotion_time && - dev->states[last_idx + 1].exit_latency <= - pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { + dev->states[last_idx + 1].exit_latency <= latency_req) { last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { @@ -92,7 +98,7 @@ static int ladder_select_state(struct cpuidle_device *dev) } /* consider demotion */ - if (last_idx > 0 && + if (last_idx > CPUIDLE_DRIVER_STATE_START && last_residency < last_state->threshold.demotion_time) { last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; @@ -117,7 +123,7 @@ static int ladder_enable_device(struct cpuidle_device *dev) struct ladder_device_state *lstate; struct cpuidle_state *state; - ldev->last_state_idx = 0; + ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START; for (i = 0; i < dev->state_count; i++) { state = &dev->states[i]; diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 78d77c5dc35..b8f3e21530b 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -34,21 +34,28 @@ static DEFINE_PER_CPU(struct menu_device, menu_devices); static int menu_select(struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); + int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY); int i; + /* Special case when user has set very strict latency requirement */ + if (unlikely(latency_req == 0)) { + data->last_state_idx = 0; + return 0; + } + /* determine the expected residency time */ data->expected_us = (u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000; /* find the deepest idle state that satisfies our constraints */ - for (i = 1; i < dev->state_count; i++) { + for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) { struct cpuidle_state *s = &dev->states[i]; if (s->target_residency > data->expected_us) break; if (s->target_residency > data->predicted_us) break; - if (s->exit_latency > pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) + if (s->exit_latency > latency_req) break; } -- cgit v1.2.3-70-g09d2 From 320eee776357db52d6fcfb11cff985b1976a4595 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 30 Jul 2008 19:21:43 -0700 Subject: cpuidle: Menu governor fix wrong usage of measured_us There is a bug in menu governor where we have if (data->elapsed_us < data->elapsed_us + measured_us) with measured_us already having elapsed_us added in tickless case here unsigned int measured_us = cpuidle_get_last_residency(dev) + data->elapsed_us; Also, it should be last_residency, not measured_us, that need to be used to do comparing and distinguish between expected & non-expected events. Refactor menu_reflect() to fix these two problems. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Wei Gang Signed-off-by: Andi Kleen --- drivers/cpuidle/governors/menu.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b8f3e21530b..8d7cf3f3145 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -74,9 +74,9 @@ static void menu_reflect(struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int last_idx = data->last_state_idx; - unsigned int measured_us = - cpuidle_get_last_residency(dev) + data->elapsed_us; + unsigned int last_idle_us = cpuidle_get_last_residency(dev); struct cpuidle_state *target = &dev->states[last_idx]; + unsigned int measured_us; /* * Ugh, this idle state doesn't support residency measurements, so we @@ -84,20 +84,27 @@ static void menu_reflect(struct cpuidle_device *dev) * for one full standard timer tick. However, be aware that this * could potentially result in a suboptimal state transition. */ - if (!(target->flags & CPUIDLE_FLAG_TIME_VALID)) - measured_us = USEC_PER_SEC / HZ; + if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) + last_idle_us = USEC_PER_SEC / HZ; - /* Predict time remaining until next break event */ - if (measured_us + BREAK_FUZZ < data->expected_us - target->exit_latency) { - data->predicted_us = max(measured_us, data->last_measured_us); + /* + * measured_us and elapsed_us are the cumulative idle time, since the + * last time we were woken out of idle by an interrupt. + */ + if (data->elapsed_us <= data->elapsed_us + last_idle_us) + measured_us = data->elapsed_us + last_idle_us; + else + measured_us = -1; + + /* Predict time until next break event */ + data->predicted_us = max(measured_us, data->last_measured_us); + + if (last_idle_us + BREAK_FUZZ < + data->expected_us - target->exit_latency) { data->last_measured_us = measured_us; data->elapsed_us = 0; } else { - if (data->elapsed_us < data->elapsed_us + measured_us) - data->elapsed_us = measured_us; - else - data->elapsed_us = -1; - data->predicted_us = max(measured_us, data->last_measured_us); + data->elapsed_us = measured_us; } } -- cgit v1.2.3-70-g09d2 From 06d9e908b2248f983b186aaf569c58e1430db85d Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 30 Jul 2008 19:21:44 -0700 Subject: cpuidle: Make ladder governor honor latency requirements fully ladder governor only honored latency requirement when promoting C-states. Instead. it should check for latency requirement on each idle call, and demote to appropriate C-state when there is a latency requirement change. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- drivers/cpuidle/governors/ladder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 27ab3bfe375..a4bec3f919a 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -98,6 +98,18 @@ static int ladder_select_state(struct cpuidle_device *dev) } /* consider demotion */ + if (last_idx > CPUIDLE_DRIVER_STATE_START && + dev->states[last_idx].exit_latency > latency_req) { + int i; + + for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) { + if (dev->states[i].exit_latency <= latency_req) + break; + } + ladder_do_selection(ldev, last_idx, i); + return i; + } + if (last_idx > CPUIDLE_DRIVER_STATE_START && last_residency < last_state->threshold.demotion_time) { last_state->stats.demotion_count++; -- cgit v1.2.3-70-g09d2