summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/coupled.c2
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c107
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c6
-rw-r--r--drivers/cpuidle/cpuidle.c108
-rw-r--r--drivers/cpuidle/driver.c2
-rw-r--r--drivers/cpuidle/governors/menu.c75
6 files changed, 207 insertions, 93 deletions
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index e952936418d..cb6654bfad7 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu)
struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
- __smp_call_function_single(cpu, csd, 0);
+ smp_call_function_single_async(cpu, csd);
}
/**
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 78fd174c57e..719f6fb5b1c 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -11,9 +11,18 @@
#include <linux/cpuidle.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/clockchips.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
+#include <asm/runlatch.h>
+
+/* Flags and constants used in PowerNV platform */
+
+#define MAX_POWERNV_IDLE_STATES 8
+#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */
+#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */
struct cpuidle_driver powernv_idle_driver = {
.name = "powernv_idle",
@@ -30,12 +39,14 @@ static int snooze_loop(struct cpuidle_device *dev,
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
+ ppc64_runlatch_off();
while (!need_resched()) {
HMT_low();
HMT_very_low();
}
HMT_medium();
+ ppc64_runlatch_on();
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb();
return index;
@@ -45,14 +56,42 @@ static int nap_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
+ ppc64_runlatch_off();
power7_idle();
+ ppc64_runlatch_on();
+ return index;
+}
+
+static int fastsleep_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long old_lpcr = mfspr(SPRN_LPCR);
+ unsigned long new_lpcr;
+
+ if (unlikely(system_state < SYSTEM_RUNNING))
+ return index;
+
+ new_lpcr = old_lpcr;
+ new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */
+
+ /* exit powersave upon external interrupt, but not decrementer
+ * interrupt.
+ */
+ new_lpcr |= LPCR_PECE0;
+
+ mtspr(SPRN_LPCR, new_lpcr);
+ power7_sleep();
+
+ mtspr(SPRN_LPCR, old_lpcr);
+
return index;
}
/*
* States for dedicated partition case.
*/
-static struct cpuidle_state powernv_states[] = {
+static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
@@ -60,13 +99,6 @@ static struct cpuidle_state powernv_states[] = {
.exit_latency = 0,
.target_residency = 0,
.enter = &snooze_loop },
- { /* NAP */
- .name = "NAP",
- .desc = "NAP",
- .flags = CPUIDLE_FLAG_TIME_VALID,
- .exit_latency = 10,
- .target_residency = 100,
- .enter = &nap_loop },
};
static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
@@ -127,19 +159,74 @@ static int powernv_cpuidle_driver_init(void)
return 0;
}
+static int powernv_add_idle_states(void)
+{
+ struct device_node *power_mgt;
+ struct property *prop;
+ int nr_idle_states = 1; /* Snooze */
+ int dt_idle_states;
+ u32 *flags;
+ int i;
+
+ /* Currently we have snooze statically defined */
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return nr_idle_states;
+ }
+
+ prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL);
+ if (!prop) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+ return nr_idle_states;
+ }
+
+ dt_idle_states = prop->length / sizeof(u32);
+ flags = (u32 *) prop->value;
+
+ for (i = 0; i < dt_idle_states; i++) {
+
+ if (flags[i] & IDLE_USE_INST_NAP) {
+ /* Add NAP state */
+ strcpy(powernv_states[nr_idle_states].name, "Nap");
+ strcpy(powernv_states[nr_idle_states].desc, "Nap");
+ powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
+ powernv_states[nr_idle_states].exit_latency = 10;
+ powernv_states[nr_idle_states].target_residency = 100;
+ powernv_states[nr_idle_states].enter = &nap_loop;
+ nr_idle_states++;
+ }
+
+ if (flags[i] & IDLE_USE_INST_SLEEP) {
+ /* Add FASTSLEEP state */
+ strcpy(powernv_states[nr_idle_states].name, "FastSleep");
+ strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
+ powernv_states[nr_idle_states].flags =
+ CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
+ powernv_states[nr_idle_states].exit_latency = 300;
+ powernv_states[nr_idle_states].target_residency = 1000000;
+ powernv_states[nr_idle_states].enter = &fastsleep_loop;
+ nr_idle_states++;
+ }
+ }
+
+ return nr_idle_states;
+}
+
/*
* powernv_idle_probe()
* Choose state table for shared versus dedicated partition
*/
static int powernv_idle_probe(void)
{
-
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
if (firmware_has_feature(FW_FEATURE_OPALv3)) {
cpuidle_state_table = powernv_states;
- max_idle_state = ARRAY_SIZE(powernv_states);
+ /* Device tree can indicate more idle states */
+ max_idle_state = powernv_add_idle_states();
} else
return -ENODEV;
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 7ab564aa0b1..6f7b0195688 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -17,6 +17,7 @@
#include <asm/reg.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
+#include <asm/runlatch.h>
#include <asm/plpar_wrappers.h>
struct cpuidle_driver pseries_idle_driver = {
@@ -29,6 +30,7 @@ static struct cpuidle_state *cpuidle_state_table;
static inline void idle_loop_prolog(unsigned long *in_purr)
{
+ ppc64_runlatch_off();
*in_purr = mfspr(SPRN_PURR);
/*
* Indicate to the HV that we are idle. Now would be
@@ -45,6 +47,10 @@ static inline void idle_loop_epilog(unsigned long in_purr)
wait_cycles += mfspr(SPRN_PURR) - in_purr;
get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
get_lppaca()->idle = 0;
+
+ if (irqs_disabled())
+ local_irq_enable();
+ ppc64_runlatch_on();
}
static int snooze_loop(struct cpuidle_device *dev,
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index a55e68f2cfc..8236746e46b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -65,6 +65,26 @@ int cpuidle_play_dead(void)
}
/**
+ * cpuidle_enabled - check if the cpuidle framework is ready
+ * @dev: cpuidle device for this cpu
+ * @drv: cpuidle driver for this cpu
+ *
+ * Return 0 on success, otherwise:
+ * -NODEV : the cpuidle framework is not available
+ * -EBUSY : the cpuidle framework is not initialized
+ */
+int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ if (off || !initialized)
+ return -ENODEV;
+
+ if (!drv || !dev || !dev->enabled)
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
* cpuidle_enter_state - enter the state and update stats
* @dev: cpuidle device for this cpu
* @drv: cpuidle driver for this cpu
@@ -85,7 +105,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
time_end = ktime_get();
- local_irq_enable();
+ if (!cpuidle_state_is_coupled(dev, drv, entered_state))
+ local_irq_enable();
diff = ktime_to_us(ktime_sub(time_end, time_start));
if (diff > INT_MAX)
@@ -108,61 +129,48 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
}
/**
- * cpuidle_idle_call - the main idle loop
+ * cpuidle_select - ask the cpuidle framework to choose an idle state
*
- * NOTE: no locks or semaphores should be used here
- * return non-zero on failure
+ * @drv: the cpuidle driver
+ * @dev: the cpuidle device
+ *
+ * Returns the index of the idle state.
*/
-int cpuidle_idle_call(void)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
- struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
- struct cpuidle_driver *drv;
- int next_state, entered_state;
- bool broadcast;
-
- if (off || !initialized)
- return -ENODEV;
-
- /* check if the device is ready */
- if (!dev || !dev->enabled)
- return -EBUSY;
-
- drv = cpuidle_get_cpu_driver(dev);
-
- /* ask the governor for the next state */
- next_state = cpuidle_curr_governor->select(drv, dev);
- if (need_resched()) {
- dev->last_residency = 0;
- /* give the governor an opportunity to reflect on the outcome */
- if (cpuidle_curr_governor->reflect)
- cpuidle_curr_governor->reflect(dev, next_state);
- local_irq_enable();
- return 0;
- }
-
- trace_cpu_idle_rcuidle(next_state, dev->cpu);
-
- broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);
-
- if (broadcast)
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
-
- if (cpuidle_state_is_coupled(dev, drv, next_state))
- entered_state = cpuidle_enter_state_coupled(dev, drv,
- next_state);
- else
- entered_state = cpuidle_enter_state(dev, drv, next_state);
-
- if (broadcast)
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ return cpuidle_curr_governor->select(drv, dev);
+}
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
+/**
+ * cpuidle_enter - enter into the specified idle state
+ *
+ * @drv: the cpuidle driver tied with the cpu
+ * @dev: the cpuidle device
+ * @index: the index in the idle state table
+ *
+ * Returns the index in the idle state, < 0 in case of error.
+ * The error code depends on the backend driver
+ */
+int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int index)
+{
+ if (cpuidle_state_is_coupled(dev, drv, index))
+ return cpuidle_enter_state_coupled(dev, drv, index);
+ return cpuidle_enter_state(dev, drv, index);
+}
- /* give the governor an opportunity to reflect on the outcome */
+/**
+ * cpuidle_reflect - tell the underlying governor what was the state
+ * we were in
+ *
+ * @dev : the cpuidle device
+ * @index: the index in the idle state table
+ *
+ */
+void cpuidle_reflect(struct cpuidle_device *dev, int index)
+{
if (cpuidle_curr_governor->reflect)
- cpuidle_curr_governor->reflect(dev, entered_state);
-
- return 0;
+ cpuidle_curr_governor->reflect(dev, index);
}
/**
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 06dbe7c8619..136d6a283e0 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -209,7 +209,7 @@ static void poll_idle_init(struct cpuidle_driver *drv)
state->exit_latency = 0;
state->target_residency = 0;
state->power_usage = -1;
- state->flags = 0;
+ state->flags = CPUIDLE_FLAG_TIME_VALID;
state->enter = poll_idle;
state->disabled = false;
}
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index cf7f2f0e4ef..71b52329335 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -122,9 +122,8 @@ struct menu_device {
int last_state_idx;
int needs_update;
- unsigned int expected_us;
+ unsigned int next_timer_us;
unsigned int predicted_us;
- unsigned int exit_us;
unsigned int bucket;
unsigned int correction_factor[BUCKETS];
unsigned int intervals[INTERVALS];
@@ -257,7 +256,7 @@ again:
stddev = int_sqrt(stddev);
if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
|| stddev <= 20) {
- if (data->expected_us > avg)
+ if (data->next_timer_us > avg)
data->predicted_us = avg;
return;
}
@@ -289,7 +288,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
struct menu_device *data = &__get_cpu_var(menu_devices);
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
int i;
- int multiplier;
+ unsigned int interactivity_req;
struct timespec t;
if (data->needs_update) {
@@ -298,7 +297,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
}
data->last_state_idx = 0;
- data->exit_us = 0;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0))
@@ -306,13 +304,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
/* determine the expected residency time, round up */
t = ktime_to_timespec(tick_nohz_get_sleep_length());
- data->expected_us =
+ data->next_timer_us =
t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC;
- data->bucket = which_bucket(data->expected_us);
-
- multiplier = performance_multiplier();
+ data->bucket = which_bucket(data->next_timer_us);
/*
* if the correction factor is 0 (eg first time init or cpu hotplug
@@ -326,17 +322,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* operands are 32 bits.
* Make sure to round up for half microseconds.
*/
- data->predicted_us = div_round64((uint64_t)data->expected_us *
+ data->predicted_us = div_round64((uint64_t)data->next_timer_us *
data->correction_factor[data->bucket],
RESOLUTION * DECAY);
get_typical_interval(data);
/*
+ * Performance multiplier defines a minimum predicted idle
+ * duration / latency ratio. Adjust the latency limit if
+ * necessary.
+ */
+ interactivity_req = data->predicted_us / performance_multiplier();
+ if (latency_req > interactivity_req)
+ latency_req = interactivity_req;
+
+ /*
* We want to default to C1 (hlt), not to busy polling
* unless the timer is happening really really soon.
*/
- if (data->expected_us > 5 &&
+ if (data->next_timer_us > 5 &&
!drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
@@ -355,11 +360,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
continue;
if (s->exit_latency > latency_req)
continue;
- if (s->exit_latency * multiplier > data->predicted_us)
- continue;
data->last_state_idx = i;
- data->exit_us = s->exit_latency;
}
return data->last_state_idx;
@@ -390,36 +392,47 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int last_idx = data->last_state_idx;
- unsigned int last_idle_us = cpuidle_get_last_residency(dev);
struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
unsigned int new_factor;
/*
- * Ugh, this idle state doesn't support residency measurements, so we
- * are basically lost in the dark. As a compromise, assume we slept
- * for the whole expected time.
+ * Try to figure out how much time passed between entry to low
+ * power state and occurrence of the wakeup event.
+ *
+ * If the entered idle state didn't support residency measurements,
+ * we are basically lost in the dark how much time passed.
+ * As a compromise, assume we slept for the whole expected time.
+ *
+ * Any measured amount of time will include the exit latency.
+ * Since we are interested in when the wakeup begun, not when it
+ * was completed, we must substract the exit latency. However, if
+ * the measured amount of time is less than the exit latency,
+ * assume the state was never reached and the exit latency is 0.
*/
- if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
- last_idle_us = data->expected_us;
+ if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
+ /* Use timer value as is */
+ measured_us = data->next_timer_us;
+ } else {
+ /* Use measured value */
+ measured_us = cpuidle_get_last_residency(dev);
- measured_us = last_idle_us;
-
- /*
- * We correct for the exit latency; we are assuming here that the
- * exit latency happens after the event that we're interested in.
- */
- if (measured_us > data->exit_us)
- measured_us -= data->exit_us;
+ /* Deduct exit latency */
+ if (measured_us > target->exit_latency)
+ measured_us -= target->exit_latency;
+ /* Make sure our coefficients do not exceed unity */
+ if (measured_us > data->next_timer_us)
+ measured_us = data->next_timer_us;
+ }
/* Update our correction ratio */
new_factor = data->correction_factor[data->bucket];
new_factor -= new_factor / DECAY;
- if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
- new_factor += RESOLUTION * measured_us / data->expected_us;
+ if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+ new_factor += RESOLUTION * measured_us / data->next_timer_us;
else
/*
* we were idle so long that we count it as a perfect
@@ -439,7 +452,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = last_idle_us;
+ data->intervals[data->interval_ptr++] = measured_us;
if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0;
}