From b1beab48f6148d50fee4a56d741cc3168fe1b995 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 31 Jan 2013 19:55:37 -0500 Subject: intel_idle: stop using driver_data for static flags The commit, 4202735e8ab6ecfb0381631a0d0b58fefe0bd4e2 (cpuidle: Split cpuidle_state structure and move per-cpu statistics fields) observed that the MWAIT flags for Cn on every processor to date were the same, and created get_driver_data() to supply them. Unfortunately, that assumption is false, going forward. So here we restore the MWAIT flags to the cpuidle_state table. However, instead restoring the old "driver_data" field, we put the flags into the existing "flags" field, where they probalby should have lived all along. This patch does not change any operation. This patch removes 1 of the 3 users of cpuidle_state_usage.driver_data. Perhaps some day we'll get rid of the other 2. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 75 ++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 2df9414a72f..b2cf489ba3e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -108,6 +108,16 @@ static struct cpuidle_state *cpuidle_state_table; */ #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 +/* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. + * + * We store the hint at the top of our "flags" for each state. + */ +#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) +#define MWAIT2flg(eax) ((eax & 0xFF) << 24) + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -118,21 +128,21 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-NHM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-NHM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-NHM", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle }, @@ -143,28 +153,28 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-SNB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-SNB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-SNB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "C7-SNB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, .enter = &intel_idle }, @@ -175,28 +185,28 @@ static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-IVB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-IVB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-IVB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "C7-IVB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, .enter = &intel_idle }, @@ -207,14 +217,14 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-ATM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 4, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C2-ATM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, @@ -222,7 +232,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C4 */ .name = "C4-ATM", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, .enter = &intel_idle }, @@ -230,41 +240,12 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C6 */ .name = "C6-ATM", .desc = "MWAIT 0x52", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, .enter = &intel_idle }, }; -static long get_driver_data(int cstate) -{ - int driver_data; - switch (cstate) { - - case 1: /* MWAIT C1 */ - driver_data = 0x00; - break; - case 2: /* MWAIT C2 */ - driver_data = 0x10; - break; - case 3: /* MWAIT C3 */ - driver_data = 0x20; - break; - case 4: /* MWAIT C4 */ - driver_data = 0x30; - break; - case 5: /* MWAIT C5 */ - driver_data = 0x40; - break; - case 6: /* MWAIT C6 */ - driver_data = 0x52; - break; - default: - driver_data = 0x00; - } - return driver_data; -} - /** * intel_idle * @dev: cpuidle_device @@ -278,8 +259,7 @@ static int intel_idle(struct cpuidle_device *dev, { unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); + unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; int cpu = smp_processor_id(); @@ -558,9 +538,6 @@ static int intel_idle_cpu_init(int cpu) if (cpuidle_state_table[cstate].enter == NULL) continue; - dev->states_usage[dev->state_count].driver_data = - (void *)get_driver_data(cstate); - dev->state_count += 1; } -- cgit v1.2.3-70-g09d2 From 41cdb0efc42ed3c10f56e0740db28eff086af62b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 4 Feb 2013 22:44:40 +0000 Subject: ACPI / idle: remove unused definition The different definitions are not used anywhere in the code. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index ed9a1cc690b..5cb5f247d2b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -64,10 +64,6 @@ #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_idle"); -#define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY) -#define C2_OVERHEAD 1 /* 1us */ -#define C3_OVERHEAD 1 /* 1us */ -#define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; module_param(max_cstate, uint, 0000); -- cgit v1.2.3-70-g09d2 From e2668fb53ffd3e01c3116f65a9047a25e4c66399 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 4 Feb 2013 22:44:41 +0000 Subject: ACPI / idle : remove pointless headers These different headers are not needed. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 5cb5f247d2b..82626e9abe7 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -28,19 +28,12 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#include #include -#include -#include -#include #include #include -#include -#include /* need_resched() */ -#include +#include /* need_resched() */ #include #include -#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -52,12 +45,8 @@ #include #endif -#include -#include - #include #include -#include #define PREFIX "ACPI: " -- cgit v1.2.3-70-g09d2 From 6ef0f086beafb3c7d81cd3de81d886d105ea25b2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 4 Feb 2013 22:44:42 +0000 Subject: ACPI / idle: pass the cpuidle_device parameter The cpuidle_device is retrieved in the function by using directly the global variable. But the caller of this function already have this device and it can be passed as a parameter. That is one small step to encapsulate the code more. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 82626e9abe7..68370659df6 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -928,13 +928,14 @@ struct cpuidle_driver acpi_idle_driver = { * device i.e. per-cpu data * * @pr: the ACPI processor + * @dev : the cpuidle device */ -static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) +static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, + struct cpuidle_device *dev) { int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; struct cpuidle_state_usage *state_usage; - struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id); if (!pr->flags.power_setup_done) return -EINVAL; @@ -1089,7 +1090,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr) cpuidle_disable_device(dev); acpi_processor_get_power_info(pr); if (pr->flags.power) { - acpi_processor_setup_cpuidle_cx(pr); + acpi_processor_setup_cpuidle_cx(pr, dev); ret = cpuidle_enable_device(dev); } cpuidle_resume_and_unlock(); @@ -1147,8 +1148,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) continue; acpi_processor_get_power_info(_pr); if (_pr->flags.power) { - acpi_processor_setup_cpuidle_cx(_pr); dev = per_cpu(acpi_cpuidle_device, cpu); + acpi_processor_setup_cpuidle_cx(_pr, dev); cpuidle_enable_device(dev); } } @@ -1217,7 +1218,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr) return -ENOMEM; per_cpu(acpi_cpuidle_device, pr->id) = dev; - acpi_processor_setup_cpuidle_cx(pr); + acpi_processor_setup_cpuidle_cx(pr, dev); /* Register per-cpu cpuidle_device. Cpuidle driver * must already be registered before registering device -- cgit v1.2.3-70-g09d2 From ac3ebafa81af76d65e4fb45c6388f08e90ddcc6d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 4 Feb 2013 22:44:43 +0000 Subject: ACPI / idle: remove usage of the statedata Len Brown sent a patch to remove this field in the intel_idle driver. The other user of this field is the davinci cpuidle driver and a patch has been sent to remove the usage of it. This patch removes the last user of this field. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 68370659df6..8b433cb08a3 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -66,6 +66,8 @@ module_param(latency_factor, uint, 0644); static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); +static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX]; + static int disabled_by_idle_boot_param(void) { return boot_option_idle_override == IDLE_POLL || @@ -721,8 +723,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -745,8 +746,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, */ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; ACPI_FLUSH_CPU_CACHE(); @@ -776,8 +776,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -835,8 +834,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -935,7 +933,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, { int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; - struct cpuidle_state_usage *state_usage; if (!pr->flags.power_setup_done) return -EINVAL; @@ -954,7 +951,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { cx = &pr->power.states[i]; - state_usage = &dev->states_usage[count]; if (!cx->valid) continue; @@ -965,8 +961,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) continue; #endif - - cpuidle_set_statedata(state_usage, cx); + acpi_cstate[count] = cx; count++; if (count == CPUIDLE_STATE_MAX) -- cgit v1.2.3-70-g09d2 From 85a4d2d41dc6d1c0296326204a857a9fab864a31 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 31 Jan 2013 14:40:49 -0500 Subject: intel_idle: support Haswell This patch enables intel_idle to run on the next-generation Intel(R) Microarchitecture code named "Haswell". Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b2cf489ba3e..fa714774b96 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -212,6 +212,38 @@ static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; +static struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = { + { /* MWAIT C0 */ }, + { /* MWAIT C1 */ + .name = "C1-HSW", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { /* MWAIT C2 */ + .name = "C3-HSW", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 33, + .target_residency = 100, + .enter = &intel_idle }, + { /* MWAIT C3 */ + .name = "C6-HSW", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, + .target_residency = 400, + .enter = &intel_idle }, + { /* MWAIT C4 */ + .name = "C7s-HSW", + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, + .target_residency = 500, + .enter = &intel_idle }, +}; + static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ @@ -365,6 +397,10 @@ static const struct idle_cpu idle_cpu_ivb = { .state_table = ivb_cstates, }; +static const struct idle_cpu idle_cpu_hsw = { + .state_table = hsw_cstates, +}; + #define ICPU(model, cpu) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } @@ -382,6 +418,9 @@ static const struct x86_cpu_id intel_idle_ids[] = { ICPU(0x2d, idle_cpu_snb), ICPU(0x3a, idle_cpu_ivb), ICPU(0x3e, idle_cpu_ivb), + ICPU(0x3c, idle_cpu_hsw), + ICPU(0x3f, idle_cpu_hsw), + ICPU(0x45, idle_cpu_hsw), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); -- cgit v1.2.3-70-g09d2 From 137ecc779c80138723677209730738d76262e810 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Feb 2013 21:35:35 -0500 Subject: intel_idle: remove use and definition of MWAIT_MAX_NUM_CSTATES Cosmetic only. Replace use of MWAIT_MAX_NUM_CSTATES with CPUIDLE_STATE_MAX. They are both 8, so this patch has no functional change. The reason to change is that intel_idle will soon be able to export more than the 8 "major" states supported by MWAIT. When we hit that limit, it is important to know where the limit comes from. Signed-off-by: Len Brown --- arch/x86/include/asm/mwait.h | 1 - drivers/idle/intel_idle.c | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bcdff997668..3f447320ce8 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,7 +4,6 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 -#define MWAIT_MAX_NUM_CSTATES 8 #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index fa714774b96..c949a6f25a8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = { .en_core_tk_irqen = 1, }; /* intel_idle.max_cstate=0 disables driver */ -static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; +static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int mwait_substates; @@ -123,7 +123,7 @@ static struct cpuidle_state *cpuidle_state_table; * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. */ -static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { +static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-NHM", @@ -148,7 +148,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; -static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { +static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-SNB", @@ -180,7 +180,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; -static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { +static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-IVB", @@ -212,7 +212,7 @@ static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; -static struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = { +static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-HSW", @@ -244,7 +244,7 @@ static struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; -static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { +static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-ATM", @@ -503,7 +503,7 @@ static int intel_idle_cpuidle_driver_init(void) drv->state_count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { + for (cstate = 1; cstate < CPUIDLE_STATE_MAX; ++cstate) { int num_substates; if (cstate > max_cstate) { @@ -560,7 +560,7 @@ static int intel_idle_cpu_init(int cpu) dev->state_count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { + for (cstate = 1; cstate < CPUIDLE_STATE_MAX; ++cstate) { int num_substates; if (cstate > max_cstate) { -- cgit v1.2.3-70-g09d2 From e022e7eb90f3edb83f9ff77825eda3d1b3a2f2e0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Feb 2013 23:37:30 -0500 Subject: intel_idle: remove assumption of one C-state per MWAIT flag Remove the assumption that cstate_tables are indexed by MWAIT flag values. Each entry identifies itself via its own flags value. This change is needed to support multiple states that share the same MWAIT flags. Note that this can have an effect on what state is described by 'N' on cmdline intel_idle.max_cstate=N on some systems. intel_idle.max_cstate=0 still disables the driver intel_idle.max_cstate=1 still results in just C1(E) However, "place holders" in the sparse C-state name-space (eg. Atom) have been removed. Signed-off-by: Len Brown --- arch/x86/include/asm/mwait.h | 2 + drivers/idle/intel_idle.c | 110 +++++++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 51 deletions(-) (limited to 'drivers') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 3f447320ce8..2f366d0ac6b 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,6 +4,8 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 +#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) +#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c949a6f25a8..927cfb4d66f 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -124,158 +124,161 @@ static struct cpuidle_state *cpuidle_state_table; * Thus C0 is a dummy. */ static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ + { .name = "C1-NHM", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C3-NHM", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-NHM", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle }, + { + .enter = NULL } }; static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ + { .name = "C1-SNB", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C3-SNB", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-SNB", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7-SNB", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, .enter = &intel_idle }, + { + .enter = NULL } }; static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ + { .name = "C1-IVB", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C3-IVB", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-IVB", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7-IVB", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, .enter = &intel_idle }, + { + .enter = NULL } }; static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ + { .name = "C1-HSW", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 2, .target_residency = 2, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C3-HSW", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, .target_residency = 100, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-HSW", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7s-HSW", .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, .enter = &intel_idle }, + { + .enter = NULL } }; static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ + { .name = "C1-ATM", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 4, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C2-ATM", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ }, - { /* MWAIT C4 */ + { .name = "C4-ATM", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, .enter = &intel_idle }, - { /* MWAIT C5 */ }, - { /* MWAIT C6 */ + { .name = "C6-ATM", .desc = "MWAIT 0x52", .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, .enter = &intel_idle }, + { + .enter = NULL } }; /** @@ -503,32 +506,31 @@ static int intel_idle_cpuidle_driver_init(void) drv->state_count = 1; - for (cstate = 1; cstate < CPUIDLE_STATE_MAX; ++cstate) { - int num_substates; + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int num_substates, mwait_hint, mwait_cstate, mwait_substate; - if (cstate > max_cstate) { + if (cpuidle_state_table[cstate].enter == NULL) + break; + + if (cstate + 1 > max_cstate) { printk(PREFIX "max_cstate %d reached\n", max_cstate); break; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); + mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint); + /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) + num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) { - /* does the driver not know about the state? */ - if (*cpuidle_state_table[cstate].name == '\0') - pr_debug(PREFIX "unaware of model 0x%x" - " MWAIT %d please" - " contact lenb@kernel.org\n", - boot_cpu_data.x86_model, cstate); + + /* if sub-state in table is not enumerated by CPUID */ + if ((mwait_substate + 1) > num_substates) continue; - } - if ((cstate > 2) && + if (((mwait_cstate + 1) > 2) && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halts in idle" " states deeper than C2"); @@ -560,21 +562,27 @@ static int intel_idle_cpu_init(int cpu) dev->state_count = 1; - for (cstate = 1; cstate < CPUIDLE_STATE_MAX; ++cstate) { - int num_substates; + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int num_substates, mwait_hint, mwait_cstate, mwait_substate; - if (cstate > max_cstate) { + if (cpuidle_state_table[cstate].enter == NULL) + continue; + + if (cstate + 1 > max_cstate) { printk(PREFIX "max_cstate %d reached\n", max_cstate); break; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); + mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint); + /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) - & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) + num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) + & MWAIT_SUBSTATE_MASK; + + /* if sub-state in table is not enumerated by CPUID */ + if ((mwait_substate + 1) > num_substates) continue; dev->state_count += 1; -- cgit v1.2.3-70-g09d2 From 69fb3676df3329a7142803bb3502fa59dc0db2e3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 01:38:39 -0500 Subject: x86 idle: remove mwait_idle() and "idle=mwait" cmdline param mwait_idle() is a C1-only idle loop intended to be more efficient than HLT, starting on Pentium-4 HT-enabled processors. But mwait_idle() has been replaced by the more general mwait_idle_with_hints(), which handles both C1 and deeper C-states. ACPI processor_idle and intel_idle use only mwait_idle_with_hints(), and no longer use mwait_idle(). Here we simplify the x86 native idle code by removing mwait_idle(), and the "idle=mwait" bootparam used to invoke it. Since Linux 3.0 there has been a boot-time warning when "idle=mwait" was invoked saying it would be removed in 2012. This removal was also noted in the (now removed:-) feature-removal-schedule.txt. After this change, kernels configured with (CONFIG_ACPI=n && CONFIG_INTEL_IDLE=n) when run on hardware that supports MWAIT will simply use HLT. If MWAIT is desired on those systems, cpuidle and the cpuidle drivers above can be enabled. Signed-off-by: Len Brown Cc: x86@kernel.org --- Documentation/kernel-parameters.txt | 7 +--- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process.c | 79 +------------------------------------ arch/x86/kernel/smpboot.c | 2 +- drivers/acpi/processor_idle.c | 1 - 5 files changed, 4 insertions(+), 87 deletions(-) (limited to 'drivers') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9..3b0cd1e612d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1039,16 +1039,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Claim all unknown PCI IDE storage controllers. idle= [X86] - Format: idle=poll, idle=mwait, idle=halt, idle=nomwait + Format: idle=poll, idle=halt, idle=nomwait Poll forces a polling idle loop that can slightly improve the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. - idle=mwait: On systems which support MONITOR/MWAIT but - the kernel chose to not use it because it doesn't save - as much power as a normal idle loop, use the - MONITOR/MWAIT idle loop anyways. Performance should be - the same as idle=poll. idle=halt: Halt is forced to be used for CPU idle. In such case C2/C3 won't be used again. idle=nomwait: Disable mwait for CPU C-states diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2f7f472275..8a28feae1c9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -725,7 +725,7 @@ extern unsigned long boot_option_idle_override; extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, - IDLE_POLL, IDLE_FORCE_MWAIT}; + IDLE_POLL}; extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7ed9f6b08ba..cd5a4c9ef83 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -421,27 +421,6 @@ void stop_this_cpu(void *dummy) } } -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle_rcuidle(1, smp_processor_id()); - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - } else - local_irq_enable(); -} - /* * On SMP it's slightly faster (but much more power-consuming!) * to poll the ->work.need_resched flag instead of waiting for the @@ -458,53 +437,6 @@ static void poll_idle(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -/* - * mwait selection logic: - * - * It depends on the CPU. For AMD CPUs that support MWAIT this is - * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings - * then depend on a clock divisor and current Pstate of the core. If - * all cores of a processor are in halt state (C1) the processor can - * enter the C1E (C1 enhanced) state. If mwait is used this will never - * happen. - * - * idle=mwait overrides this decision and forces the usage of mwait. - */ - -#define MWAIT_INFO 0x05 -#define MWAIT_ECX_EXTENDED_INFO 0x01 -#define MWAIT_EDX_C1 0xf0 - -int mwait_usable(const struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - - /* Use mwait if idle=mwait boot option is given */ - if (boot_option_idle_override == IDLE_FORCE_MWAIT) - return 1; - - /* - * Any idle= boot option other than idle=mwait means that we must not - * use mwait. Eg: idle=halt or idle=poll or idle=nomwait - */ - if (boot_option_idle_override != IDLE_NO_OVERRIDE) - return 0; - - if (c->cpuid_level < MWAIT_INFO) - return 0; - - cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); - /* Check, whether EDX has extended info about MWAIT */ - if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) - return 1; - - /* - * edx enumeratios MONITOR/MWAIT extensions. Check, whether - * C1 supports MWAIT - */ - return (edx & MWAIT_EDX_C1); -} - bool amd_e400_c1e_detected; EXPORT_SYMBOL(amd_e400_c1e_detected); @@ -576,13 +508,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) if (pm_idle) return; - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * One CPU supports mwait => All CPUs supports mwait - */ - pr_info("using mwait in idle threads\n"); - pm_idle = mwait_idle; - } else if (cpu_has_amd_erratum(amd_erratum_400)) { + if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); pm_idle = amd_e400_idle; @@ -606,9 +532,6 @@ static int __init idle_setup(char *str) pr_info("using polling idle threads\n"); pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; - } else if (!strcmp(str, "mwait")) { - boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed0fe385289..a6ceaedc396 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) + if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index ed9a1cc690b..52a5e3a4cdc 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -84,7 +84,6 @@ static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); static int disabled_by_idle_boot_param(void) { return boot_option_idle_override == IDLE_POLL || - boot_option_idle_override == IDLE_FORCE_MWAIT || boot_option_idle_override == IDLE_HALT; } -- cgit v1.2.3-70-g09d2 From 32e9518005c8dd9ed668f40f98632c8186df4909 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 2 Feb 2013 01:31:56 -0500 Subject: intel_idle: export both C1 and C1E Here we disable HW promotion of C1 to C1E and export both C1 and C1E and distinct C-states. This allows a cpuidle governor to choose a lower latency C-state than C1E when necessary to satisfy performance and QOS constraints -- and still save power versus polling. This also corrects the erroneous latency previously reported for C1E -- it is 10usec, not 1usec. Note that if you use "intel_idle.max_cstate=N", then you must increment N by 1 to get the same behavior after this change. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 54 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 927cfb4d66f..5d667501386 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -90,6 +90,7 @@ struct idle_cpu { * Indicate which enable bits to clear here. */ unsigned long auto_demotion_disable_flags; + bool disable_promotion_to_c1e; }; static const struct idle_cpu *icpu; @@ -131,6 +132,13 @@ static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = { .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, + { + .name = "C1E-NHM", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, { .name = "C3-NHM", .desc = "MWAIT 0x10", @@ -154,8 +162,15 @@ static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = { .name = "C1-SNB", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 1, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { + .name = "C1E-SNB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, .enter = &intel_idle }, { .name = "C3-SNB", @@ -190,6 +205,13 @@ static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = { .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, + { + .name = "C1E-IVB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, { .name = "C3-IVB", .desc = "MWAIT 0x10", @@ -223,6 +245,13 @@ static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = { .exit_latency = 2, .target_residency = 2, .enter = &intel_idle }, + { + .name = "C1E-HSW", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, { .name = "C3-HSW", .desc = "MWAIT 0x10", @@ -250,11 +279,11 @@ static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = { static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = { { - .name = "C1-ATM", + .name = "C1E-ATM", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 4, + .exit_latency = 10, + .target_residency = 20, .enter = &intel_idle }, { .name = "C2-ATM", @@ -377,10 +406,19 @@ static void auto_demotion_disable(void *dummy) msr_bits &= ~(icpu->auto_demotion_disable_flags); wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); } +static void c1e_promotion_disable(void *dummy) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} static const struct idle_cpu idle_cpu_nehalem = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = true, }; static const struct idle_cpu idle_cpu_atom = { @@ -394,14 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = { static const struct idle_cpu idle_cpu_snb = { .state_table = snb_cstates, + .disable_promotion_to_c1e = true, }; static const struct idle_cpu idle_cpu_ivb = { .state_table = ivb_cstates, + .disable_promotion_to_c1e = true, }; static const struct idle_cpu idle_cpu_hsw = { .state_table = hsw_cstates, + .disable_promotion_to_c1e = true, }; #define ICPU(model, cpu) \ @@ -544,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void) if (icpu->auto_demotion_disable_flags) on_each_cpu(auto_demotion_disable, NULL, 1); + if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */ + on_each_cpu(c1e_promotion_disable, NULL, 1); + return 0; } -- cgit v1.2.3-70-g09d2