diff options
Diffstat (limited to 'drivers/cpufreq/cpufreq_governor.c')
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 291 |
1 files changed, 184 insertions, 107 deletions
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 5a76086ff09..443442df113 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,12 +22,29 @@ #include <linux/export.h> #include <linux/kernel_stat.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/tick.h> #include <linux/types.h> #include <linux/workqueue.h> #include "cpufreq_governor.h" +static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) +{ + if (have_governor_per_policy()) + return &policy->kobj; + else + return cpufreq_global_kobject; +} + +static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) +{ + if (have_governor_per_policy()) + return dbs_data->cdata->attr_group_gov_pol; + else + return dbs_data->cdata->attr_group_gov_sys; +} + static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; @@ -50,13 +67,13 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) return cputime_to_usecs(idle_time); } -u64 get_cpu_idle_time(unsigned int cpu, u64 *wall) +u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy) { - u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); - else + else if (!io_busy) idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; @@ -65,7 +82,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy; @@ -73,7 +90,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int ignore_nice; unsigned int j; - if (dbs_data->governor == GOV_ONDEMAND) + if (dbs_data->cdata->governor == GOV_ONDEMAND) ignore_nice = od_tuners->ignore_nice; else ignore_nice = cs_tuners->ignore_nice; @@ -83,13 +100,22 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) /* Get Absolute Load (in terms of freq for ondemand gov) */ for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs; - u64 cur_wall_time, cur_idle_time, cur_iowait_time; - unsigned int idle_time, wall_time, iowait_time; + u64 cur_wall_time, cur_idle_time; + unsigned int idle_time, wall_time; unsigned int load; + int io_busy = 0; - j_cdbs = dbs_data->get_cpu_cdbs(j); + j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + /* + * For the purpose of ondemand, waiting for disk IO is + * an indication that you're performance critical, and + * not that the system is actually idle. So do not add + * the iowait time to the cpu idle time. + */ + if (dbs_data->cdata->governor == GOV_ONDEMAND) + io_busy = od_tuners->io_is_busy; + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); wall_time = (unsigned int) (cur_wall_time - j_cdbs->prev_cpu_wall); @@ -117,35 +143,12 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) idle_time += jiffies_to_usecs(cur_nice_jiffies); } - if (dbs_data->governor == GOV_ONDEMAND) { - struct od_cpu_dbs_info_s *od_j_dbs_info = - dbs_data->get_cpu_dbs_info_s(cpu); - - cur_iowait_time = get_cpu_iowait_time_us(j, - &cur_wall_time); - if (cur_iowait_time == -1ULL) - cur_iowait_time = 0; - - iowait_time = (unsigned int) (cur_iowait_time - - od_j_dbs_info->prev_cpu_iowait); - od_j_dbs_info->prev_cpu_iowait = cur_iowait_time; - - /* - * For the purpose of ondemand, waiting for disk IO is - * an indication that you're performance critical, and - * not that the system is actually idle. So subtract the - * iowait time from the cpu idle time. - */ - if (od_tuners->io_is_busy && idle_time >= iowait_time) - idle_time -= iowait_time; - } - if (unlikely(!wall_time || wall_time < idle_time)) continue; load = 100 * (wall_time - idle_time) / wall_time; - if (dbs_data->governor == GOV_ONDEMAND) { + if (dbs_data->cdata->governor == GOV_ONDEMAND) { int freq_avg = __cpufreq_driver_getavg(policy, j); if (freq_avg <= 0) freq_avg = policy->cur; @@ -157,24 +160,42 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->gov_check_cpu(cpu, max_load); + dbs_data->cdata->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); -static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, - unsigned int sampling_rate) +static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, + unsigned int delay) { - int delay = delay_for_sampling_rate(sampling_rate); - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - schedule_delayed_work_on(cpu, &cdbs->work, delay); + mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); } -static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) +void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, + unsigned int delay, bool all_cpus) { - struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + int i; - cancel_delayed_work_sync(&cdbs->work); + if (!all_cpus) { + __gov_queue_work(smp_processor_id(), dbs_data, delay); + } else { + for_each_cpu(i, policy->cpus) + __gov_queue_work(i, dbs_data, delay); + } +} +EXPORT_SYMBOL_GPL(gov_queue_work); + +static inline void gov_cancel_work(struct dbs_data *dbs_data, + struct cpufreq_policy *policy) +{ + struct cpu_dbs_common_info *cdbs; + int i; + + for_each_cpu(i, policy->cpus) { + cdbs = dbs_data->cdata->get_cpu_cdbs(i); + cancel_delayed_work_sync(&cdbs->work); + } } /* Will return if we need to evaluate cpu load again or not */ @@ -196,31 +217,130 @@ bool need_load_eval(struct cpu_dbs_common_info *cdbs, } EXPORT_SYMBOL_GPL(need_load_eval); -int cpufreq_governor_dbs(struct dbs_data *dbs_data, - struct cpufreq_policy *policy, unsigned int event) +static void set_sampling_rate(struct dbs_data *dbs_data, + unsigned int sampling_rate) { + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + cs_tuners->sampling_rate = sampling_rate; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + od_tuners->sampling_rate = sampling_rate; + } +} + +int cpufreq_governor_dbs(struct cpufreq_policy *policy, + struct common_dbs_data *cdata, unsigned int event) +{ + struct dbs_data *dbs_data; struct od_cpu_dbs_info_s *od_dbs_info = NULL; struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; - struct cs_ops *cs_ops = NULL; struct od_ops *od_ops = NULL; - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + struct od_dbs_tuners *od_tuners = NULL; + struct cs_dbs_tuners *cs_tuners = NULL; struct cpu_dbs_common_info *cpu_cdbs; - unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; + unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; + int io_busy = 0; int rc; - cpu_cdbs = dbs_data->get_cpu_cdbs(cpu); + if (have_governor_per_policy()) + dbs_data = policy->governor_data; + else + dbs_data = cdata->gdbs_data; + + WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); + + switch (event) { + case CPUFREQ_GOV_POLICY_INIT: + if (have_governor_per_policy()) { + WARN_ON(dbs_data); + } else if (dbs_data) { + policy->governor_data = dbs_data; + return 0; + } + + dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); + if (!dbs_data) { + pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); + return -ENOMEM; + } + + dbs_data->cdata = cdata; + rc = cdata->init(dbs_data); + if (rc) { + pr_err("%s: POLICY_INIT: init() failed\n", __func__); + kfree(dbs_data); + return rc; + } + + rc = sysfs_create_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + if (rc) { + cdata->exit(dbs_data); + kfree(dbs_data); + return rc; + } + + policy->governor_data = dbs_data; - if (dbs_data->governor == GOV_CONSERVATIVE) { - cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); - sampling_rate = &cs_tuners->sampling_rate; + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + /* Bring kernel and HW constraints together */ + dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, + latency * LATENCY_MULTIPLIER)); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; + + cpufreq_register_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + if (!have_governor_per_policy()) + cdata->gdbs_data = dbs_data; + + return 0; + case CPUFREQ_GOV_POLICY_EXIT: + if ((policy->governor->initialized == 1) || + have_governor_per_policy()) { + sysfs_remove_group(get_governor_parent_kobj(policy), + get_sysfs_attr(dbs_data)); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; + + cpufreq_unregister_notifier(cs_ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + cdata->exit(dbs_data); + kfree(dbs_data); + cdata->gdbs_data = NULL; + } + + policy->governor_data = NULL; + return 0; + } + + cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + cs_tuners = dbs_data->tuners; + cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); + sampling_rate = cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice; - cs_ops = dbs_data->gov_ops; } else { - od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); - sampling_rate = &od_tuners->sampling_rate; + od_tuners = dbs_data->tuners; + od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); + sampling_rate = od_tuners->sampling_rate; ignore_nice = od_tuners->ignore_nice; - od_ops = dbs_data->gov_ops; + od_ops = dbs_data->cdata->gov_ops; + io_busy = od_tuners->io_is_busy; } switch (event) { @@ -232,96 +352,53 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data, for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs = - dbs_data->get_cpu_cdbs(j); + dbs_data->cdata->get_cpu_cdbs(j); j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, - &j_cdbs->prev_cpu_wall); + &j_cdbs->prev_cpu_wall, io_busy); if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; mutex_init(&j_cdbs->timer_mutex); INIT_DEFERRABLE_WORK(&j_cdbs->work, - dbs_data->gov_dbs_timer); - } - - if (!policy->governor->initialized) { - rc = sysfs_create_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (rc) { - mutex_unlock(&dbs_data->mutex); - return rc; - } + dbs_data->cdata->gov_dbs_timer); } /* * conservative does not implement micro like ondemand * governor, thus we are bound to jiffes/HZ */ - if (dbs_data->governor == GOV_CONSERVATIVE) { + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { cs_dbs_info->down_skip = 0; cs_dbs_info->enable = 1; cs_dbs_info->requested_freq = policy->cur; - - if (!policy->governor->initialized) { - cpufreq_register_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - dbs_data->min_sampling_rate = - MIN_SAMPLING_RATE_RATIO * - jiffies_to_usecs(10); - } } else { od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; od_ops->powersave_bias_init_cpu(cpu); - - if (!policy->governor->initialized) - od_tuners->io_is_busy = od_ops->io_busy(); } - if (policy->governor->initialized) - goto unlock; - - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - - /* Bring kernel and HW constraints together */ - dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - *sampling_rate = max(dbs_data->min_sampling_rate, latency * - LATENCY_MULTIPLIER); -unlock: mutex_unlock(&dbs_data->mutex); /* Initiate timer time stamp */ cpu_cdbs->time_stamp = ktime_get(); - for_each_cpu(j, policy->cpus) - dbs_timer_init(dbs_data, j, *sampling_rate); + gov_queue_work(dbs_data, policy, + delay_for_sampling_rate(sampling_rate), true); break; case CPUFREQ_GOV_STOP: - if (dbs_data->governor == GOV_CONSERVATIVE) + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) cs_dbs_info->enable = 0; - for_each_cpu(j, policy->cpus) - dbs_timer_exit(dbs_data, j); + gov_cancel_work(dbs_data, policy); mutex_lock(&dbs_data->mutex); mutex_destroy(&cpu_cdbs->timer_mutex); - if (policy->governor->initialized == 1) { - sysfs_remove_group(cpufreq_global_kobject, - dbs_data->attr_group); - if (dbs_data->governor == GOV_CONSERVATIVE) - cpufreq_unregister_notifier(cs_ops->notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } mutex_unlock(&dbs_data->mutex); break; |