diff options
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/coupled.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-powernv.c | 107 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-pseries.c | 6 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 108 | ||||
-rw-r--r-- | drivers/cpuidle/driver.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 75 |
6 files changed, 207 insertions, 93 deletions
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index e952936418d..cb6654bfad7 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu) struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) - __smp_call_function_single(cpu, csd, 0); + smp_call_function_single_async(cpu, csd); } /** diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 78fd174c57e..719f6fb5b1c 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -11,9 +11,18 @@ #include <linux/cpuidle.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/clockchips.h> +#include <linux/of.h> #include <asm/machdep.h> #include <asm/firmware.h> +#include <asm/runlatch.h> + +/* Flags and constants used in PowerNV platform */ + +#define MAX_POWERNV_IDLE_STATES 8 +#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */ +#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */ struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", @@ -30,12 +39,14 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); + ppc64_runlatch_off(); while (!need_resched()) { HMT_low(); HMT_very_low(); } HMT_medium(); + ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); return index; @@ -45,14 +56,42 @@ static int nap_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + ppc64_runlatch_off(); power7_idle(); + ppc64_runlatch_on(); + return index; +} + +static int fastsleep_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long old_lpcr = mfspr(SPRN_LPCR); + unsigned long new_lpcr; + + if (unlikely(system_state < SYSTEM_RUNNING)) + return index; + + new_lpcr = old_lpcr; + new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */ + + /* exit powersave upon external interrupt, but not decrementer + * interrupt. + */ + new_lpcr |= LPCR_PECE0; + + mtspr(SPRN_LPCR, new_lpcr); + power7_sleep(); + + mtspr(SPRN_LPCR, old_lpcr); + return index; } /* * States for dedicated partition case. */ -static struct cpuidle_state powernv_states[] = { +static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = { { /* Snooze */ .name = "snooze", .desc = "snooze", @@ -60,13 +99,6 @@ static struct cpuidle_state powernv_states[] = { .exit_latency = 0, .target_residency = 0, .enter = &snooze_loop }, - { /* NAP */ - .name = "NAP", - .desc = "NAP", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 10, - .target_residency = 100, - .enter = &nap_loop }, }; static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, @@ -127,19 +159,74 @@ static int powernv_cpuidle_driver_init(void) return 0; } +static int powernv_add_idle_states(void) +{ + struct device_node *power_mgt; + struct property *prop; + int nr_idle_states = 1; /* Snooze */ + int dt_idle_states; + u32 *flags; + int i; + + /* Currently we have snooze statically defined */ + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return nr_idle_states; + } + + prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL); + if (!prop) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return nr_idle_states; + } + + dt_idle_states = prop->length / sizeof(u32); + flags = (u32 *) prop->value; + + for (i = 0; i < dt_idle_states; i++) { + + if (flags[i] & IDLE_USE_INST_NAP) { + /* Add NAP state */ + strcpy(powernv_states[nr_idle_states].name, "Nap"); + strcpy(powernv_states[nr_idle_states].desc, "Nap"); + powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID; + powernv_states[nr_idle_states].exit_latency = 10; + powernv_states[nr_idle_states].target_residency = 100; + powernv_states[nr_idle_states].enter = &nap_loop; + nr_idle_states++; + } + + if (flags[i] & IDLE_USE_INST_SLEEP) { + /* Add FASTSLEEP state */ + strcpy(powernv_states[nr_idle_states].name, "FastSleep"); + strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); + powernv_states[nr_idle_states].flags = + CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; + powernv_states[nr_idle_states].exit_latency = 300; + powernv_states[nr_idle_states].target_residency = 1000000; + powernv_states[nr_idle_states].enter = &fastsleep_loop; + nr_idle_states++; + } + } + + return nr_idle_states; +} + /* * powernv_idle_probe() * Choose state table for shared versus dedicated partition */ static int powernv_idle_probe(void) { - if (cpuidle_disable != IDLE_NO_OVERRIDE) return -ENODEV; if (firmware_has_feature(FW_FEATURE_OPALv3)) { cpuidle_state_table = powernv_states; - max_idle_state = ARRAY_SIZE(powernv_states); + /* Device tree can indicate more idle states */ + max_idle_state = powernv_add_idle_states(); } else return -ENODEV; diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 7ab564aa0b1..6f7b0195688 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -17,6 +17,7 @@ #include <asm/reg.h> #include <asm/machdep.h> #include <asm/firmware.h> +#include <asm/runlatch.h> #include <asm/plpar_wrappers.h> struct cpuidle_driver pseries_idle_driver = { @@ -29,6 +30,7 @@ static struct cpuidle_state *cpuidle_state_table; static inline void idle_loop_prolog(unsigned long *in_purr) { + ppc64_runlatch_off(); *in_purr = mfspr(SPRN_PURR); /* * Indicate to the HV that we are idle. Now would be @@ -45,6 +47,10 @@ static inline void idle_loop_epilog(unsigned long in_purr) wait_cycles += mfspr(SPRN_PURR) - in_purr; get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); get_lppaca()->idle = 0; + + if (irqs_disabled()) + local_irq_enable(); + ppc64_runlatch_on(); } static int snooze_loop(struct cpuidle_device *dev, diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a55e68f2cfc..8236746e46b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -65,6 +65,26 @@ int cpuidle_play_dead(void) } /** + * cpuidle_enabled - check if the cpuidle framework is ready + * @dev: cpuidle device for this cpu + * @drv: cpuidle driver for this cpu + * + * Return 0 on success, otherwise: + * -NODEV : the cpuidle framework is not available + * -EBUSY : the cpuidle framework is not initialized + */ +int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev) +{ + if (off || !initialized) + return -ENODEV; + + if (!drv || !dev || !dev->enabled) + return -EBUSY; + + return 0; +} + +/** * cpuidle_enter_state - enter the state and update stats * @dev: cpuidle device for this cpu * @drv: cpuidle driver for this cpu @@ -85,7 +105,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_end = ktime_get(); - local_irq_enable(); + if (!cpuidle_state_is_coupled(dev, drv, entered_state)) + local_irq_enable(); diff = ktime_to_us(ktime_sub(time_end, time_start)); if (diff > INT_MAX) @@ -108,61 +129,48 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } /** - * cpuidle_idle_call - the main idle loop + * cpuidle_select - ask the cpuidle framework to choose an idle state * - * NOTE: no locks or semaphores should be used here - * return non-zero on failure + * @drv: the cpuidle driver + * @dev: the cpuidle device + * + * Returns the index of the idle state. */ -int cpuidle_idle_call(void) +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv; - int next_state, entered_state; - bool broadcast; - - if (off || !initialized) - return -ENODEV; - - /* check if the device is ready */ - if (!dev || !dev->enabled) - return -EBUSY; - - drv = cpuidle_get_cpu_driver(dev); - - /* ask the governor for the next state */ - next_state = cpuidle_curr_governor->select(drv, dev); - if (need_resched()) { - dev->last_residency = 0; - /* give the governor an opportunity to reflect on the outcome */ - if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, next_state); - local_irq_enable(); - return 0; - } - - trace_cpu_idle_rcuidle(next_state, dev->cpu); - - broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); - - if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); - - if (cpuidle_state_is_coupled(dev, drv, next_state)) - entered_state = cpuidle_enter_state_coupled(dev, drv, - next_state); - else - entered_state = cpuidle_enter_state(dev, drv, next_state); - - if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + return cpuidle_curr_governor->select(drv, dev); +} - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); +/** + * cpuidle_enter - enter into the specified idle state + * + * @drv: the cpuidle driver tied with the cpu + * @dev: the cpuidle device + * @index: the index in the idle state table + * + * Returns the index in the idle state, < 0 in case of error. + * The error code depends on the backend driver + */ +int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, + int index) +{ + if (cpuidle_state_is_coupled(dev, drv, index)) + return cpuidle_enter_state_coupled(dev, drv, index); + return cpuidle_enter_state(dev, drv, index); +} - /* give the governor an opportunity to reflect on the outcome */ +/** + * cpuidle_reflect - tell the underlying governor what was the state + * we were in + * + * @dev : the cpuidle device + * @index: the index in the idle state table + * + */ +void cpuidle_reflect(struct cpuidle_device *dev, int index) +{ if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, entered_state); - - return 0; + cpuidle_curr_governor->reflect(dev, index); } /** diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 06dbe7c8619..136d6a283e0 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -209,7 +209,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) state->exit_latency = 0; state->target_residency = 0; state->power_usage = -1; - state->flags = 0; + state->flags = CPUIDLE_FLAG_TIME_VALID; state->enter = poll_idle; state->disabled = false; } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index cf7f2f0e4ef..71b52329335 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -122,9 +122,8 @@ struct menu_device { int last_state_idx; int needs_update; - unsigned int expected_us; + unsigned int next_timer_us; unsigned int predicted_us; - unsigned int exit_us; unsigned int bucket; unsigned int correction_factor[BUCKETS]; unsigned int intervals[INTERVALS]; @@ -257,7 +256,7 @@ again: stddev = int_sqrt(stddev); if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) || stddev <= 20) { - if (data->expected_us > avg) + if (data->next_timer_us > avg) data->predicted_us = avg; return; } @@ -289,7 +288,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) struct menu_device *data = &__get_cpu_var(menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; - int multiplier; + unsigned int interactivity_req; struct timespec t; if (data->needs_update) { @@ -298,7 +297,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) } data->last_state_idx = 0; - data->exit_us = 0; /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) @@ -306,13 +304,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* determine the expected residency time, round up */ t = ktime_to_timespec(tick_nohz_get_sleep_length()); - data->expected_us = + data->next_timer_us = t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; - data->bucket = which_bucket(data->expected_us); - - multiplier = performance_multiplier(); + data->bucket = which_bucket(data->next_timer_us); /* * if the correction factor is 0 (eg first time init or cpu hotplug @@ -326,17 +322,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * operands are 32 bits. * Make sure to round up for half microseconds. */ - data->predicted_us = div_round64((uint64_t)data->expected_us * + data->predicted_us = div_round64((uint64_t)data->next_timer_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); get_typical_interval(data); /* + * Performance multiplier defines a minimum predicted idle + * duration / latency ratio. Adjust the latency limit if + * necessary. + */ + interactivity_req = data->predicted_us / performance_multiplier(); + if (latency_req > interactivity_req) + latency_req = interactivity_req; + + /* * We want to default to C1 (hlt), not to busy polling * unless the timer is happening really really soon. */ - if (data->expected_us > 5 && + if (data->next_timer_us > 5 && !drv->states[CPUIDLE_DRIVER_STATE_START].disabled && dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0) data->last_state_idx = CPUIDLE_DRIVER_STATE_START; @@ -355,11 +360,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) continue; if (s->exit_latency > latency_req) continue; - if (s->exit_latency * multiplier > data->predicted_us) - continue; data->last_state_idx = i; - data->exit_us = s->exit_latency; } return data->last_state_idx; @@ -390,36 +392,47 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int last_idx = data->last_state_idx; - unsigned int last_idle_us = cpuidle_get_last_residency(dev); struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; unsigned int new_factor; /* - * Ugh, this idle state doesn't support residency measurements, so we - * are basically lost in the dark. As a compromise, assume we slept - * for the whole expected time. + * Try to figure out how much time passed between entry to low + * power state and occurrence of the wakeup event. + * + * If the entered idle state didn't support residency measurements, + * we are basically lost in the dark how much time passed. + * As a compromise, assume we slept for the whole expected time. + * + * Any measured amount of time will include the exit latency. + * Since we are interested in when the wakeup begun, not when it + * was completed, we must substract the exit latency. However, if + * the measured amount of time is less than the exit latency, + * assume the state was never reached and the exit latency is 0. */ - if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) - last_idle_us = data->expected_us; + if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) { + /* Use timer value as is */ + measured_us = data->next_timer_us; + } else { + /* Use measured value */ + measured_us = cpuidle_get_last_residency(dev); - measured_us = last_idle_us; - - /* - * We correct for the exit latency; we are assuming here that the - * exit latency happens after the event that we're interested in. - */ - if (measured_us > data->exit_us) - measured_us -= data->exit_us; + /* Deduct exit latency */ + if (measured_us > target->exit_latency) + measured_us -= target->exit_latency; + /* Make sure our coefficients do not exceed unity */ + if (measured_us > data->next_timer_us) + measured_us = data->next_timer_us; + } /* Update our correction ratio */ new_factor = data->correction_factor[data->bucket]; new_factor -= new_factor / DECAY; - if (data->expected_us > 0 && measured_us < MAX_INTERESTING) - new_factor += RESOLUTION * measured_us / data->expected_us; + if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING) + new_factor += RESOLUTION * measured_us / data->next_timer_us; else /* * we were idle so long that we count it as a perfect @@ -439,7 +452,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = last_idle_us; + data->intervals[data->interval_ptr++] = measured_us; if (data->interval_ptr >= INTERVALS) data->interval_ptr = 0; } |