From 51315cdfa0521fff3059cec5fb8ffecc7f37cba7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:27 +0200 Subject: cpufreq: allow driver-specific data This commit extends the cpufreq_driver structure with an additional 'void *driver_data' field that can be filled by the ->probe() function of a cpufreq driver to pass additional custom information to the driver itself. A new function called cpufreq_get_driver_data() is added to allow a cpufreq driver to retrieve those driver data, since they are typically needed from a cpufreq_policy->init() callback, which does not have access to the cpufreq_driver structure. This function call is similar to the existing cpufreq_get_current_driver() function call. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +++++++++++++++ include/linux/cpufreq.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 24bf76fba14..058d6e084a6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1731,6 +1731,21 @@ const char *cpufreq_get_current_driver(void) } EXPORT_SYMBOL_GPL(cpufreq_get_current_driver); +/** + * cpufreq_get_driver_data - return current driver data + * + * Return the private data of the currently loaded cpufreq + * driver, or NULL if no cpufreq driver is loaded. + */ +void *cpufreq_get_driver_data(void) +{ + if (cpufreq_driver) + return cpufreq_driver->driver_data; + + return NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_get_driver_data); + /********************************************************************* * NOTIFIER LISTS INTERFACE * *********************************************************************/ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 138336b6bb0..503b085b783 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; u8 flags; + void *driver_data; /* needed by all drivers */ int (*init) (struct cpufreq_policy *policy); @@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); +void *cpufreq_get_driver_data(void); static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) -- cgit v1.2.3-70-g09d2 From 34e5a5273d6aa0ee8836bd5d6111b135ffae6931 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:28 +0200 Subject: cpufreq: cpufreq-dt: extend with platform_data This commit extends the cpufreq-dt driver to take a platform_data structure. This structure is for now used to tell the cpufreq-dt driver the layout of the clocks on the platform, i.e whether all CPUs share the same clock or whether each CPU has a separate clock. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 17 +++++++++++++++-- include/linux/cpufreq-dt.h | 22 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 include/linux/cpufreq-dt.h diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 6bbb8b91344..52facdaf531 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,7 @@ try_again: static int cpufreq_init(struct cpufreq_policy *policy) { + struct cpufreq_dt_platform_data *pd; struct cpufreq_frequency_table *freq_table; struct thermal_cooling_device *cdev; struct device_node *np; @@ -265,9 +267,18 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->driver_data = priv; policy->clk = cpu_clk; - ret = cpufreq_generic_init(policy, freq_table, transition_latency); - if (ret) + ret = cpufreq_table_validate_and_show(policy, freq_table); + if (ret) { + dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__, + ret); goto out_cooling_unregister; + } + + policy->cpuinfo.transition_latency = transition_latency; + + pd = cpufreq_get_driver_data(); + if (pd && !pd->independent_clocks) + cpumask_setall(policy->cpus); of_node_put(np); @@ -335,6 +346,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev) if (!IS_ERR(cpu_reg)) regulator_put(cpu_reg); + dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); + ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) dev_err(cpu_dev, "failed register driver: %d\n", ret); diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h new file mode 100644 index 00000000000..0414009e2c3 --- /dev/null +++ b/include/linux/cpufreq-dt.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 Marvell + * Thomas Petazzoni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CPUFREQ_DT_H__ +#define __CPUFREQ_DT_H__ + +struct cpufreq_dt_platform_data { + /* + * True when each CPU has its own clock to control its + * frequency, false when all CPUs are controlled by a single + * clock. + */ + bool independent_clocks; +}; + +#endif /* __CPUFREQ_DT_H__ */ -- cgit v1.2.3-70-g09d2 From a00de1ab21acbd150db341cb56f1897550d6688c Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:29 +0200 Subject: cpufreq: cpufreq-dt: adjust message related to regulators The cpufreq-dt driver tries to get a regulator for each CPU. This regulator is optional, but when not present, a scary message "failed to get cpuX regulator" is displayed. To solve this, we reduce the severity of the message from dev_warn() to dev_dbg() and we reword the message to not be as scary. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 52facdaf531..92c162af504 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -147,8 +147,8 @@ try_again: goto try_again; } - dev_warn(cpu_dev, "failed to get cpu%d regulator: %ld\n", - cpu, PTR_ERR(cpu_reg)); + dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n", + cpu, PTR_ERR(cpu_reg)); } cpu_clk = clk_get(cpu_dev, NULL); -- cgit v1.2.3-70-g09d2 From 74aa51b5ccd3975392e30d11820dc073c5f2cd32 Mon Sep 17 00:00:00 2001 From: "Preeti U. Murthy" Date: Tue, 14 Oct 2014 13:23:00 +0530 Subject: cpuidle: powernv: Populate cpuidle state details by querying the device-tree We hard code the metrics relevant for cpuidle states in the kernel today. Instead pick them up from the device tree so that they remain relevant and updated for the system that the kernel is running on. Signed-off-by: Preeti U. Murthy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-powernv.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index a64be578dab..7d3a3497dd4 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -163,7 +163,8 @@ static int powernv_add_idle_states(void) int nr_idle_states = 1; /* Snooze */ int dt_idle_states; const __be32 *idle_state_flags; - u32 len_flags, flags; + const __be32 *idle_state_latency; + u32 len_flags, flags, latency_ns; int i; /* Currently we have snooze statically defined */ @@ -180,18 +181,32 @@ static int powernv_add_idle_states(void) return nr_idle_states; } + idle_state_latency = of_get_property(power_mgt, + "ibm,cpu-idle-state-latencies-ns", NULL); + if (!idle_state_latency) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n"); + return nr_idle_states; + } + dt_idle_states = len_flags / sizeof(u32); for (i = 0; i < dt_idle_states; i++) { flags = be32_to_cpu(idle_state_flags[i]); + + /* Cpuidle accepts exit_latency in us and we estimate + * target residency to be 10x exit_latency + */ + latency_ns = be32_to_cpu(idle_state_latency[i]); if (flags & IDLE_USE_INST_NAP) { /* Add NAP state */ strcpy(powernv_states[nr_idle_states].name, "Nap"); strcpy(powernv_states[nr_idle_states].desc, "Nap"); powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID; - powernv_states[nr_idle_states].exit_latency = 10; - powernv_states[nr_idle_states].target_residency = 100; + powernv_states[nr_idle_states].exit_latency = + ((unsigned int)latency_ns) / 1000; + powernv_states[nr_idle_states].target_residency = + ((unsigned int)latency_ns / 100); powernv_states[nr_idle_states].enter = &nap_loop; nr_idle_states++; } @@ -202,8 +217,10 @@ static int powernv_add_idle_states(void) strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; - powernv_states[nr_idle_states].exit_latency = 300; - powernv_states[nr_idle_states].target_residency = 1000000; + powernv_states[nr_idle_states].exit_latency = + ((unsigned int)latency_ns) / 1000; + powernv_states[nr_idle_states].target_residency = + ((unsigned int)latency_ns / 100); powernv_states[nr_idle_states].enter = &fastsleep_loop; nr_idle_states++; } -- cgit v1.2.3-70-g09d2 From 36b4bed5cd8f6e17019fa7d380e0836872c7b367 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Thu, 16 Oct 2014 01:16:51 +0200 Subject: cpufreq: intel_pstate: Fix setting max_perf_pct in performance policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code which changes policy to powersave changes also max_policy_pct based on max_freq. Code which change max_perf_pct has upper limit base on value max_policy_pct. When policy is changing from powersave back to performance then max_policy_pct is not changed. Which means that changing max_perf_pct is not possible to high values if max_freq was too low in powersave policy. Test case: $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq 800000 $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3300000 $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor performance $ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct 100 $ echo powersave > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor $ echo 800000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq $ echo 20 > /sys/devices/system/cpu/intel_pstate/max_perf_pct $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor powersave $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 800000 $ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct 20 $ echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor $ echo 3300000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq $ echo 100 > /sys/devices/system/cpu/intel_pstate/max_perf_pct $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor performance $ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3300000 $ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct 24 And now intel_pstate driver allows to set maximal value for max_perf_pct based on max_policy_pct which is 24 for previous powersave max_freq 800000. This patch will set default value for max_policy_pct when setting policy to performance so it will allow to set also max value for max_perf_pct. Signed-off-by: Pali Rohár Cc: All applicable Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0668b389c51..7547ab559a9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -714,6 +714,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits.min_perf_pct = 100; limits.min_perf = int_tofp(1); + limits.max_policy_pct = 100; limits.max_perf_pct = 100; limits.max_perf = int_tofp(1); limits.no_turbo = limits.turbo_disabled; -- cgit v1.2.3-70-g09d2 From c034b02e213d271b98c45c4a7b54af8f69aaac1e Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 13 Oct 2014 08:37:40 -0700 Subject: cpufreq: expose scaling_cur_freq sysfs file for set_policy() drivers Currently the core does not expose scaling_cur_freq for set_policy() drivers this breaks some userspace monitoring tools. Change the core to expose this file for all drivers and if the set_policy() driver supports the get() callback use it to retrieve the current frequency. Link: https://bugzilla.kernel.org/show_bug.cgi?id=73741 Cc: All applicable Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 058d6e084a6..644b54e1e7d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -512,7 +512,18 @@ show_one(cpuinfo_max_freq, cpuinfo.max_freq); show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -show_one(scaling_cur_freq, cur); + +static ssize_t show_scaling_cur_freq( + struct cpufreq_policy *policy, char *buf) +{ + ssize_t ret; + + if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); + else + ret = sprintf(buf, "%u\n", policy->cur); + return ret; +} static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy); @@ -906,11 +917,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, if (ret) goto err_out_kobj_put; } - if (has_target()) { - ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } + + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + if (cpufreq_driver->bios_limit) { ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); if (ret) -- cgit v1.2.3-70-g09d2 From 4521e1a0ce173daa23dfef8312d09051e057ff8e Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Mon, 13 Oct 2014 08:37:41 -0700 Subject: cpufreq: intel_pstate: Reflect current no_turbo state correctly Some BIOSes modify the state of MSR_IA32_MISC_ENABLE_TURBO_DISABLE based on the current power source for the system battery AC vs battery. Reflect the correct current state and ability to modify the no_turbo sysfs file based on current state of MSR_IA32_MISC_ENABLE_TURBO_DISABLE. Link: https://bugzilla.kernel.org/show_bug.cgi?id=83151 Cc: All applicable Signed-off-by: Gabriele Mazzotta Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 48 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7547ab559a9..5f5f5ed1169 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -138,6 +138,7 @@ struct perf_limits { static struct perf_limits limits = { .no_turbo = 0, + .turbo_disabled = 0, .max_perf_pct = 100, .max_perf = int_tofp(1), .min_perf_pct = 0, @@ -218,6 +219,18 @@ static inline void intel_pstate_reset_all_pid(void) } } +static inline void update_turbo_state(void) +{ + u64 misc_en; + struct cpudata *cpu; + + cpu = all_cpu_data[0]; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); + limits.turbo_disabled = + (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || + cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); +} + /************************** debugfs begin ************************/ static int pid_param_set(void *data, u64 val) { @@ -274,6 +287,20 @@ static void __init intel_pstate_debug_expose_params(void) return sprintf(buf, "%u\n", limits.object); \ } +static ssize_t show_no_turbo(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + ssize_t ret; + + update_turbo_state(); + if (limits.turbo_disabled) + ret = sprintf(buf, "%u\n", limits.turbo_disabled); + else + ret = sprintf(buf, "%u\n", limits.no_turbo); + + return ret; +} + static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -283,11 +310,14 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - limits.no_turbo = clamp_t(int, input, 0 , 1); + + update_turbo_state(); if (limits.turbo_disabled) { pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); - limits.no_turbo = limits.turbo_disabled; + return -EPERM; } + limits.no_turbo = clamp_t(int, input, 0, 1); + return count; } @@ -323,7 +353,6 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, return count; } -show_one(no_turbo, no_turbo); show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -501,7 +530,7 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf_adj; int min_perf; - if (limits.no_turbo) + if (limits.no_turbo || limits.turbo_disabled) max_perf = cpu->pstate.max_pstate; max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); @@ -516,6 +545,8 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { int max_perf, min_perf; + update_turbo_state(); + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); @@ -717,7 +748,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.max_policy_pct = 100; limits.max_perf_pct = 100; limits.max_perf = int_tofp(1); - limits.no_turbo = limits.turbo_disabled; + limits.no_turbo = 0; return 0; } limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; @@ -760,7 +791,6 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) { struct cpudata *cpu; int rc; - u64 misc_en; rc = intel_pstate_init_cpu(policy->cpu); if (rc) @@ -768,12 +798,6 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - if (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) { - limits.turbo_disabled = 1; - limits.no_turbo = 1; - } if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) policy->policy = CPUFREQ_POLICY_PERFORMANCE; else -- cgit v1.2.3-70-g09d2 From c034871712730a33e0267095f48b62eae958499c Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 13 Oct 2014 08:37:42 -0700 Subject: intel_pstate: Don't lose sysfs settings during cpu offline The user may have custom settings don't destroy them during suspend. Link: https://bugzilla.kernel.org/show_bug.cgi?id=80651 Reported-by: Tobias Jakobi Cc: All applicable Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5f5f5ed1169..97b349a6f5c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -702,7 +702,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; - all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); + if (!all_cpu_data[cpunum]) + all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), + GFP_KERNEL); if (!all_cpu_data[cpunum]) return -ENOMEM; @@ -783,8 +785,6 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) del_timer_sync(&all_cpu_data[cpu_num]->timer); intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); - kfree(all_cpu_data[cpu_num]); - all_cpu_data[cpu_num] = NULL; } static int intel_pstate_cpu_init(struct cpufreq_policy *policy) -- cgit v1.2.3-70-g09d2 From b27580b05e6f5253228debc60b8ff4a786ff573a Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 13 Oct 2014 08:37:43 -0700 Subject: intel_pstate: Fix BYT frequency reporting BYT has a different conversion from P state to frequency than the core processors. This causes the min/max and current frequency to be misreported on some BYT SKUs. Tested on BYT N2820, Ivybridge and Haswell processors. Link: https://bugzilla.yoctoproject.org/show_bug.cgi?id=6663 Cc: All applicable Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 97b349a6f5c..98593e0344c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -64,6 +64,7 @@ struct pstate_data { int current_pstate; int min_pstate; int max_pstate; + int scaling; int turbo_pstate; }; @@ -113,6 +114,7 @@ struct pstate_funcs { int (*get_max)(void); int (*get_min)(void); int (*get_turbo)(void); + int (*get_scaling)(void); void (*set)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); }; @@ -433,6 +435,22 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) wrmsrl(MSR_IA32_PERF_CTL, val); } +#define BYT_BCLK_FREQS 5 +static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; + +static int byt_get_scaling(void) +{ + u64 value; + int i; + + rdmsrl(MSR_FSB_FREQ, value); + i = value & 0x3; + + BUG_ON(i > BYT_BCLK_FREQS); + + return byt_freq_table[i] * 100; +} + static void byt_get_vid(struct cpudata *cpudata) { u64 value; @@ -478,6 +496,11 @@ static int core_get_turbo_pstate(void) return ret; } +static inline int core_get_scaling(void) +{ + return 100000; +} + static void core_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; @@ -502,6 +525,7 @@ static struct cpu_defaults core_params = { .get_max = core_get_max_pstate, .get_min = core_get_min_pstate, .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, .set = core_set_pstate, }, }; @@ -520,6 +544,7 @@ static struct cpu_defaults byt_params = { .get_min = byt_get_min_pstate, .get_turbo = byt_get_turbo_pstate, .set = byt_set_pstate, + .get_scaling = byt_get_scaling, .get_vid = byt_get_vid, }, }; @@ -554,7 +579,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) if (pstate == cpu->pstate.current_pstate) return; - trace_cpu_frequency(pstate * 100000, cpu->cpu); + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); cpu->pstate.current_pstate = pstate; @@ -566,6 +591,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate = pstate_funcs.get_max(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); + cpu->pstate.scaling = pstate_funcs.get_scaling(); if (pstate_funcs.get_vid) pstate_funcs.get_vid(cpu); @@ -581,7 +607,9 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); sample->freq = fp_toint( - mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); + mul_fp(int_tofp( + cpu->pstate.max_pstate * cpu->pstate.scaling / 100), + core_pct)); sample->core_pct_busy = (int32_t)core_pct; } @@ -803,12 +831,13 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) else policy->policy = CPUFREQ_POLICY_POWERSAVE; - policy->min = cpu->pstate.min_pstate * 100000; - policy->max = cpu->pstate.turbo_pstate * 100000; + policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; + policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; - policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; + policy->cpuinfo.max_freq = + cpu->pstate.turbo_pstate * cpu->pstate.scaling; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); @@ -866,6 +895,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_max = funcs->get_max; pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_turbo = funcs->get_turbo; + pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.set = funcs->set; pstate_funcs.get_vid = funcs->get_vid; } -- cgit v1.2.3-70-g09d2 From d022a65ed2473fac4a600e3424503dc571160a3e Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 13 Oct 2014 08:37:44 -0700 Subject: intel_pstate: Correct BYT VID values. Using a VID value that is not high enough for the requested P state can cause machine checks. Add a ceiling function to ensure calulated VIDs with fractional values are set to the next highest integer VID value. The algorythm for calculating the non-trubo VID from the BIOS writers guide is: vid_ratio = (vid_max - vid_min) / (max_pstate - min_pstate) vid = ceiling(vid_min + (req_pstate - min_pstate) * vid_ratio) Cc: All applicable Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 98593e0344c..27bb6d3877e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -52,6 +52,17 @@ static inline int32_t div_fp(int32_t x, int32_t y) return div_s64((int64_t)x << FRAC_BITS, y); } +static inline int ceiling_fp(int32_t x) +{ + int mask, ret; + + ret = fp_toint(x); + mask = (1 << FRAC_BITS) - 1; + if (x & mask) + ret += 1; + return ret; +} + struct sample { int32_t core_pct_busy; u64 aperf; @@ -425,7 +436,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) cpudata->vid.ratio); vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); - vid = fp_toint(vid_fp); + vid = ceiling_fp(vid_fp); if (pstate > cpudata->pstate.max_pstate) vid = cpudata->vid.turbo; -- cgit v1.2.3-70-g09d2