From b62b8ef906cdf7115af579ce7378886ce3e0ce00 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 29 Apr 2008 02:35:56 -0400 Subject: force offline the processor during hot-removal The ACPI device node for the cpu has already been unregistered when acpi_processor_handle_eject is called. Thus we should offline the cpu and continue, rather than a failure here. http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Zhang Rui Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- drivers/acpi/processor_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/acpi/processor_core.c') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9dd0fa93b9e..1c0008edccd 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -1014,9 +1014,9 @@ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) static int acpi_processor_handle_eject(struct acpi_processor *pr) { - if (cpu_online(pr->id)) { - return (-EINVAL); - } + if (cpu_online(pr->id)) + cpu_down(pr->id); + arch_unregister_cpu(pr->id); acpi_unmap_lsapic(pr->id); return (0); -- cgit v1.2.3-70-g09d2 From 9f1eb99c757939b0b1783f926130993e9c298bae Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 29 Apr 2008 02:36:07 -0400 Subject: create sysfs link from acpi device to sysdev for cpu Sys I/F under acpi device node and sysdev device node are both needed for cpu hot-removal. User space need this link so that they know they are poking the sys I/F for the same cpu. http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Zhang Rui Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- drivers/acpi/processor_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/acpi/processor_core.c') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 1c0008edccd..feddc8ed870 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -621,7 +621,7 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) int result = 0; acpi_status status = AE_OK; struct acpi_processor *pr; - + struct sys_device *sysdev; pr = acpi_driver_data(device); @@ -652,6 +652,10 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) if (result) goto end; + sysdev = get_cpu_sysdev(pr->id); + if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) + return -EFAULT; + status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify, pr); @@ -810,6 +814,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify); + sysfs_remove_link(&device->dev.kobj, "sysdev"); + acpi_processor_remove_fs(device); if (pr->cdev) { -- cgit v1.2.3-70-g09d2 From 706546d02384b64e083bd9130c56eaa599c66038 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 9 Jun 2008 16:22:23 -0700 Subject: ACPI: change processors from array to per_cpu variable Change processors from an array sized by NR_CPUS to a per_cpu variable. Signed-off-by: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- drivers/acpi/processor_core.c | 20 +++++++++----------- drivers/acpi/processor_idle.c | 8 ++++---- drivers/acpi/processor_perflib.c | 18 +++++++++--------- drivers/acpi/processor_throttling.c | 14 +++++++------- include/acpi/processor.h | 2 +- 5 files changed, 30 insertions(+), 32 deletions(-) (limited to 'drivers/acpi/processor_core.c') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index feddc8ed870..9a803f85ccf 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -118,7 +118,7 @@ static const struct file_operations acpi_processor_info_fops = { .release = single_release, }; -struct acpi_processor *processors[NR_CPUS]; +DEFINE_PER_CPU(struct acpi_processor *, processors); struct acpi_processor_errata errata __read_mostly; /* -------------------------------------------------------------------------- @@ -614,7 +614,7 @@ static int acpi_processor_get_info(struct acpi_processor *pr, unsigned has_uid) return 0; } -static void *processor_device_array[NR_CPUS]; +static DEFINE_PER_CPU(void *, processor_device_array); static int __cpuinit acpi_processor_start(struct acpi_device *device) { @@ -638,15 +638,15 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) * ACPI id of processors can be reported wrongly by the BIOS. * Don't trust it blindly */ - if (processor_device_array[pr->id] != NULL && - processor_device_array[pr->id] != device) { + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { printk(KERN_WARNING "BIOS reported wrong ACPI id " "for the processor\n"); return -ENODEV; } - processor_device_array[pr->id] = device; + per_cpu(processor_device_array, pr->id) = device; - processors[pr->id] = pr; + per_cpu(processors, pr->id) = pr; result = acpi_processor_add_fs(device); if (result) @@ -753,7 +753,7 @@ static int acpi_cpu_soft_notify(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct acpi_processor *pr = processors[cpu]; + struct acpi_processor *pr = per_cpu(processors, cpu); if (action == CPU_ONLINE && pr) { acpi_processor_ppc_has_changed(pr); @@ -825,8 +825,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) pr->cdev = NULL; } - processors[pr->id] = NULL; - processor_device_array[pr->id] = NULL; + per_cpu(processors, pr->id) = NULL; + per_cpu(processor_device_array, pr->id) = NULL; kfree(pr); return 0; @@ -1074,8 +1074,6 @@ static int __init acpi_processor_init(void) { int result = 0; - - memset(&processors, 0, sizeof(processors)); memset(&errata, 0, sizeof(errata)); #ifdef CONFIG_SMP diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4976e5db2b3..0fc310e7dfd 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -401,7 +401,7 @@ static void acpi_processor_idle(void) */ local_irq_disable(); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (!pr) { local_irq_enable(); return; @@ -1431,7 +1431,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct acpi_processor *pr; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1471,7 +1471,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1549,7 +1549,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index d80b2d1441a..b4749969c6b 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -89,7 +89,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, if (event != CPUFREQ_INCOMPATIBLE) goto out; - pr = processors[policy->cpu]; + pr = per_cpu(processors, policy->cpu); if (!pr || !pr->performance) goto out; @@ -572,7 +572,7 @@ int acpi_processor_preregister_performance( /* Call _PSD for all CPUs */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) { /* Look only at processors in ACPI namespace */ continue; @@ -603,7 +603,7 @@ int acpi_processor_preregister_performance( * domain info. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -624,7 +624,7 @@ int acpi_processor_preregister_performance( cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -651,7 +651,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -680,7 +680,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -697,7 +697,7 @@ int acpi_processor_preregister_performance( err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr || !pr->performance) continue; @@ -728,7 +728,7 @@ acpi_processor_register_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return -ENODEV; @@ -766,7 +766,7 @@ acpi_processor_unregister_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index bb06738860c..8728782aad3 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -71,7 +71,7 @@ static int acpi_processor_update_tsd_coord(void) * coordination between all CPUs. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -93,7 +93,7 @@ static int acpi_processor_update_tsd_coord(void) cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -119,7 +119,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -152,7 +152,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -172,7 +172,7 @@ static int acpi_processor_update_tsd_coord(void) err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -214,7 +214,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data) struct acpi_processor_throttling *p_throttling; cpu = p_tstate->cpu; - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid pr pointer\n")); return 0; @@ -1035,7 +1035,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * cpus. */ for_each_cpu_mask(i, online_throttling_cpus) { - match_pr = processors[i]; + match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the * error message and continue. diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 06ebb6ef72a..3795590e152 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -255,7 +255,7 @@ extern void acpi_processor_unregister_performance(struct int acpi_processor_notify_smm(struct module *calling_module); /* for communication between multiple parts of the processor kernel module */ -extern struct acpi_processor *processors[NR_CPUS]; +DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; void arch_acpi_processor_init_pdc(struct acpi_processor *pr); -- cgit v1.2.3-70-g09d2 From da5e09a1b3e5a9fc0b15a3feb64e921ccc55ba74 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 18:01:09 +0800 Subject: ACPI : Create "idle=nomwait" bootparam "idle=nomwait" disables the use of the MWAIT instruction from both C1 (C1_FFH) and deeper (C2C3_FFH) C-states. When MWAIT is unavailable, the BIOS and OS generally negotiate to use the HALT instruction for C1, and use IO accesses for deeper C-states. This option is useful for power and performance comparisons, and also to work around BIOS bugs where broken MWAIT support is advertised. http://bugzilla.kernel.org/show_bug.cgi?id=10807 http://bugzilla.kernel.org/show_bug.cgi?id=10914 Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- Documentation/kernel-parameters.txt | 3 ++- arch/ia64/kernel/process.c | 2 ++ arch/x86/kernel/process.c | 11 +++++++++++ drivers/acpi/processor_core.c | 13 +++++++++++++ drivers/acpi/processor_idle.c | 6 +++++- include/asm-ia64/processor.h | 1 + include/asm-x86/processor.h | 1 + 7 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/acpi/processor_core.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 65db7f4711a..5e497d16fb5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -818,7 +818,7 @@ and is between 256 and 4096 characters. It is defined in the file See Documentation/ide/ide.txt. idle= [X86] - Format: idle=poll or idle=mwait, idle=halt + Format: idle=poll or idle=mwait, idle=halt, idle=nomwait Poll forces a polling idle loop that can slightly improves the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. @@ -828,6 +828,7 @@ and is between 256 and 4096 characters. It is defined in the file as idle=poll. idle=halt. Halt is forced to be used for CPU idle. In such case C2/C3 won't be used again. + idle=nomwait. Disable mwait for CPU C-states ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 612b3c4a060..3ab8373103e 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -57,6 +57,8 @@ unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); void ia64_do_show_stack (struct unw_frame_info *info, void *arg) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc72949876..4d629c62f4f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -11,6 +11,8 @@ unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; @@ -340,6 +342,15 @@ static int __init idle_setup(char *str) pm_idle = default_idle; idle_halt = 1; return 0; + } else if (!strcmp(str, "nomwait")) { + /* + * If the boot option of "idle=nomwait" is added, + * it means that mwait will be disabled for CPU C2/C3 + * states. In such case it won't touch the variable + * of boot_option_idle_override. + */ + idle_nomwait = 1; + return 0; } else return -1; diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9a803f85ccf..4e1bb89fd6c 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -265,7 +265,20 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr) if (!pdc_in) return status; + if (idle_nomwait) { + /* + * If mwait is disabled for CPU C-states, the C2C3_FFH access + * mode will be disabled in the parameter of _PDC object. + * Of course C1_FFH access mode will also be disabled. + */ + union acpi_object *obj; + u32 *buffer = NULL; + obj = pdc_in->pointer; + buffer = (u32 *)(obj->buffer.pointer); + buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); + + } status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); if (ACPI_FAILURE(status)) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c75c7ace8c1..d592dbb1d12 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -957,13 +957,17 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) } else { continue; } - if (cx.type == ACPI_STATE_C1 && idle_halt) { + if (cx.type == ACPI_STATE_C1 && + (idle_halt || idle_nomwait)) { /* * In most cases the C1 space_id obtained from * _CST object is FIXED_HARDWARE access mode. * But when the option of idle=halt is added, * the entry_method type should be changed from * CSTATE_FFH to CSTATE_HALT. + * When the option of idle=nomwait is added, + * the C1 entry_method type should be + * CSTATE_HALT. */ cx.entry_method = ACPI_CSTATE_HALT; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index f36e28a5f61..f88fa054d01 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -764,6 +764,7 @@ prefetchw (const void *x) extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; +extern unsigned long idle_nomwait; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index bc221623248..55402d2ab93 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -728,6 +728,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; +extern unsigned long idle_nomwait; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -- cgit v1.2.3-70-g09d2 From 2a2a64714d9c40f7705c4de1e79a5b855c7211a9 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 18:02:57 +0800 Subject: ACPI: Disable MWAIT via DMI on broken Compal board If a system matches in this DMI table, Linux will disable MWAIT support for idle. ie. "idle=nomwait" is automatically invoked and C1_FFH and C2C3_FFH access mode are disabled. http://bugzilla.kernel.org/show_bug.cgi?id=10807 http://bugzilla.kernel.org/show_bug.cgi?id=10914 Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- drivers/acpi/processor_core.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/acpi/processor_core.c') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 4e1bb89fd6c..ec0f2d581ec 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -120,6 +120,29 @@ static const struct file_operations acpi_processor_info_fops = { DEFINE_PER_CPU(struct acpi_processor *, processors); struct acpi_processor_errata errata __read_mostly; +static int set_no_mwait(const struct dmi_system_id *id) +{ + printk(KERN_NOTICE PREFIX "%s detected - " + "disable mwait for CPU C-stetes\n", id->ident); + idle_nomwait = 1; + return 0; +} + +static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { + { + set_no_mwait, "IFL91 board", { + DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), + DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), + DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, + { + set_no_mwait, "Extensa 5220", { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "ACER"), + DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), + DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, + {}, +}; /* -------------------------------------------------------------------------- Errata Handling @@ -1100,6 +1123,11 @@ static int __init acpi_processor_init(void) return -ENOMEM; acpi_processor_dir->owner = THIS_MODULE; + /* + * Check whether the system is DMI table. If yes, OSPM + * should not use mwait for CPU-states. + */ + dmi_check_system(processor_idle_dmi_table); result = cpuidle_register_driver(&acpi_idle_driver); if (result < 0) goto out_proc; -- cgit v1.2.3-70-g09d2