diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-devices-system-cpu | 11 | ||||
-rw-r--r-- | Documentation/cpu-freq/boost.txt | 93 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 177 |
3 files changed, 281 insertions, 0 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5dab36448b4..6943133afcb 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -176,3 +176,14 @@ Description: Disable L3 cache indices All AMD processors with L3 caches provide this functionality. For details, see BKDGs at http://developer.amd.com/documentation/guides/Pages/default.aspx + + +What: /sys/devices/system/cpu/cpufreq/boost +Date: August 2012 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Processor frequency boosting control + + This switch controls the boost setting for the whole system. + Boosting allows the CPU and the firmware to run at a frequency + beyound it's nominal limit. + More details can be found in Documentation/cpu-freq/boost.txt diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt new file mode 100644 index 00000000000..9b4edfcf486 --- /dev/null +++ b/Documentation/cpu-freq/boost.txt @@ -0,0 +1,93 @@ +Processor boosting control + + - information for users - + +Quick guide for the impatient: +-------------------- +/sys/devices/system/cpu/cpufreq/boost +controls the boost setting for the whole system. You can read and write +that file with either "0" (boosting disabled) or "1" (boosting allowed). +Reading or writing 1 does not mean that the system is boosting at this +very moment, but only that the CPU _may_ raise the frequency at it's +discretion. +-------------------- + +Introduction +------------- +Some CPUs support a functionality to raise the operating frequency of +some cores in a multi-core package if certain conditions apply, mostly +if the whole chip is not fully utilized and below it's intended thermal +budget. This is done without operating system control by a combination +of hardware and firmware. +On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core", +in technical documentation "Core performance boost". In Linux we use +the term "boost" for convenience. + +Rationale for disable switch +---------------------------- + +Though the idea is to just give better performance without any user +intervention, sometimes the need arises to disable this functionality. +Most systems offer a switch in the (BIOS) firmware to disable the +functionality at all, but a more fine-grained and dynamic control would +be desirable: +1. While running benchmarks, reproducible results are important. Since + the boosting functionality depends on the load of the whole package, + single thread performance can vary. By explicitly disabling the boost + functionality at least for the benchmark's run-time the system will run + at a fixed frequency and results are reproducible again. +2. To examine the impact of the boosting functionality it is helpful + to do tests with and without boosting. +3. Boosting means overclocking the processor, though under controlled + conditions. By raising the frequency and the voltage the processor + will consume more power than without the boosting, which may be + undesirable for instance for mobile users. Disabling boosting may + save power here, though this depends on the workload. + + +User controlled switch +---------------------- + +To allow the user to toggle the boosting functionality, the acpi-cpufreq +driver exports a sysfs knob to disable it. There is a file: +/sys/devices/system/cpu/cpufreq/boost +which can either read "0" (boosting disabled) or "1" (boosting enabled). +Reading the file is always supported, even if the processor does not +support boosting. In this case the file will be read-only and always +reads as "0". Explicitly changing the permissions and writing to that +file anyway will return EINVAL. + +On supported CPUs one can write either a "0" or a "1" into this file. +This will either disable the boost functionality on all cores in the +whole system (0) or will allow the hardware to boost at will (1). + +Writing a "1" does not explicitly boost the system, but just allows the +CPU (and the firmware) to boost at their discretion. Some implementations +take external factors like the chip's temperature into account, so +boosting once does not necessarily mean that it will occur every time +even using the exact same software setup. + + +AMD legacy cpb switch +--------------------- +The AMD powernow-k8 driver used to support a very similar switch to +disable or enable the "Core Performance Boost" feature of some AMD CPUs. +This switch was instantiated in each CPU's cpufreq directory +(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb". +Though the per CPU existence hints at a more fine grained control, the +actual implementation only supported a system-global switch semantics, +which was simply reflected into each CPU's file. Writing a 0 or 1 into it +would pull the other CPUs to the same state. +For compatibility reasons this file and its behavior is still supported +on AMD CPUs, though it is now protected by a config switch +(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created, +even with the config option set. +This functionality is considered legacy and will be removed in some future +kernel version. + +More fine grained boosting control +---------------------------------- + +Technically it is possible to switch the boosting functionality at least +on a per package basis, for some CPUs even per core. Currently the driver +does not support it, but this may be implemented in the future. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 70e717305c2..dffa7af1db7 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -63,6 +63,8 @@ enum { #define INTEL_MSR_RANGE (0xffff) #define AMD_MSR_RANGE (0x7) +#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) + struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; @@ -78,6 +80,96 @@ static struct acpi_processor_performance __percpu *acpi_perf_data; static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; +static bool boost_enabled, boost_supported; +static struct msr __percpu *msrs; + +static bool boost_state(unsigned int cpu) +{ + u32 lo, hi; + u64 msr; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); + case X86_VENDOR_AMD: + rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_K7_HWCR_CPB_DIS); + } + return false; +} + +static void boost_set_msrs(bool enable, const struct cpumask *cpumask) +{ + u32 cpu; + u32 msr_addr; + u64 msr_mask; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + msr_addr = MSR_IA32_MISC_ENABLE; + msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + break; + case X86_VENDOR_AMD: + msr_addr = MSR_K7_HWCR; + msr_mask = MSR_K7_HWCR_CPB_DIS; + break; + default: + return; + } + + rdmsr_on_cpus(cpumask, msr_addr, msrs); + + for_each_cpu(cpu, cpumask) { + struct msr *reg = per_cpu_ptr(msrs, cpu); + if (enable) + reg->q &= ~msr_mask; + else + reg->q |= msr_mask; + } + + wrmsr_on_cpus(cpumask, msr_addr, msrs); +} + +static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val = 0; + + if (!boost_supported) + return -EINVAL; + + ret = kstrtoul(buf, 10, &val); + if (ret || (val > 1)) + return -EINVAL; + + if ((val && boost_enabled) || (!val && !boost_enabled)) + return count; + + get_online_cpus(); + + boost_set_msrs(val, cpu_online_mask); + + put_online_cpus(); + + boost_enabled = val; + pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + + return count; +} + +static ssize_t show_global_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", boost_enabled); +} + +static struct global_attr global_boost = __ATTR(boost, 0644, + show_global_boost, + store_global_boost); static int check_est_cpu(unsigned int cpuid) { @@ -448,6 +540,44 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } +static int boost_notify(struct notifier_block *nb, unsigned long action, + void *hcpu) +{ + unsigned cpu = (long)hcpu; + const struct cpumask *cpumask; + + cpumask = get_cpu_mask(cpu); + + /* + * Clear the boost-disable bit on the CPU_DOWN path so that + * this cpu cannot block the remaining ones from boosting. On + * the CPU_UP path we simply keep the boost-disable flag in + * sync with the current global state. + */ + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + boost_set_msrs(boost_enabled, cpumask); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + boost_set_msrs(1, cpumask); + break; + + default: + break; + } + + return NOTIFY_OK; +} + + +static struct notifier_block boost_nb = { + .notifier_call = boost_notify, +}; + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -774,6 +904,49 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; +static void __init acpi_cpufreq_boost_init(void) +{ + if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) { + msrs = msrs_alloc(); + + if (!msrs) + return; + + boost_supported = true; + boost_enabled = boost_state(0); + + get_online_cpus(); + + /* Force all MSRs to the same value */ + boost_set_msrs(boost_enabled, cpu_online_mask); + + register_cpu_notifier(&boost_nb); + + put_online_cpus(); + } else + global_boost.attr.mode = 0444; + + /* We create the boost file in any case, though for systems without + * hardware support it will be read-only and hardwired to return 0. + */ + if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr))) + pr_warn(PFX "could not register global boost sysfs file\n"); + else + pr_debug("registered global boost sysfs file\n"); +} + +static void __exit acpi_cpufreq_boost_exit(void) +{ + sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr)); + + if (msrs) { + unregister_cpu_notifier(&boost_nb); + + msrs_free(msrs); + msrs = NULL; + } +} + static int __init acpi_cpufreq_init(void) { int ret; @@ -790,6 +963,8 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) free_acpi_perf_data(); + else + acpi_cpufreq_boost_init(); return ret; } @@ -798,6 +973,8 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("acpi_cpufreq_exit\n"); + acpi_cpufreq_boost_exit(); + cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); |