From 7a6aedfac98c6d54ecf97ca1ffb1e6a1f3d26aea Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 25 Mar 2008 15:06:53 -0700 Subject: [CPUFREQ] change cpu freq arrays to per_cpu variables Change cpufreq_policy and cpufreq_governor pointer tables from arrays to per_cpu variables in the cpufreq subsystem. Also some minor complaints from checkpatch.pl fixed. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git Signed-off-by: Mike Travis Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 45 ++++++++++++++++++++++------------------- drivers/cpufreq/cpufreq_stats.c | 24 +++++++++++----------- drivers/cpufreq/freq_table.c | 12 +++++------ 3 files changed, 42 insertions(+), 39 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7fce038fa57..6b73d163fa8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -38,10 +38,10 @@ * also protects the cpufreq_cpu_data array. */ static struct cpufreq_driver *cpufreq_driver; -static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS]; +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); #ifdef CONFIG_HOTPLUG_CPU /* This one keeps track of the previously set governor of a removed CPU */ -static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS]; +static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); #endif static DEFINE_SPINLOCK(cpufreq_driver_lock); @@ -135,7 +135,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) struct cpufreq_policy *data; unsigned long flags; - if (cpu >= NR_CPUS) + if (cpu >= nr_cpu_ids) goto err_out; /* get the cpufreq driver */ @@ -149,7 +149,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) /* get the CPU */ - data = cpufreq_cpu_data[cpu]; + data = per_cpu(cpufreq_cpu_data, cpu); if (!data) goto err_out_put_module; @@ -327,7 +327,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) dprintk("notification %u of frequency transition to %u kHz\n", state, freqs->new); - policy = cpufreq_cpu_data[freqs->cpu]; + policy = per_cpu(cpufreq_cpu_data, freqs->cpu); switch (state) { case CPUFREQ_PRECHANGE: @@ -828,8 +828,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU - if (cpufreq_cpu_governor[cpu]){ - policy->governor = cpufreq_cpu_governor[cpu]; + if (per_cpu(cpufreq_cpu_governor, cpu)) { + policy->governor = per_cpu(cpufreq_cpu_governor, cpu); dprintk("Restoring governor %s for cpu %d\n", policy->governor->name, cpu); } @@ -854,7 +854,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) spin_lock_irqsave(&cpufreq_driver_lock, flags); managed_policy->cpus = policy->cpus; - cpufreq_cpu_data[cpu] = managed_policy; + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); dprintk("CPU already managed, adding link\n"); @@ -899,7 +899,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu_mask(j, policy->cpus) { - cpufreq_cpu_data[j] = policy; + per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -946,7 +946,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu_mask(j, policy->cpus) - cpufreq_cpu_data[j] = NULL; + per_cpu(cpufreq_cpu_data, j) = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); kobject_put(&policy->kobj); @@ -989,7 +989,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) dprintk("unregistering CPU %u\n", cpu); spin_lock_irqsave(&cpufreq_driver_lock, flags); - data = cpufreq_cpu_data[cpu]; + data = per_cpu(cpufreq_cpu_data, cpu); if (!data) { spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -997,7 +997,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) unlock_policy_rwsem_write(cpu); return -EINVAL; } - cpufreq_cpu_data[cpu] = NULL; + per_cpu(cpufreq_cpu_data, cpu) = NULL; #ifdef CONFIG_SMP @@ -1019,19 +1019,19 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU - cpufreq_cpu_governor[cpu] = data->governor; + per_cpu(cpufreq_cpu_governor, cpu) = data->governor; #endif /* if we have other CPUs still registered, we need to unlink them, * or else wait_for_completion below will lock up. Clean the - * cpufreq_cpu_data[] while holding the lock, and remove the sysfs - * links afterwards. + * per_cpu(cpufreq_cpu_data) while holding the lock, and remove + * the sysfs links afterwards. */ if (unlikely(cpus_weight(data->cpus) > 1)) { for_each_cpu_mask(j, data->cpus) { if (j == cpu) continue; - cpufreq_cpu_data[j] = NULL; + per_cpu(cpufreq_cpu_data, j) = NULL; } } @@ -1043,7 +1043,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) continue; dprintk("removing link for cpu %u\n", j); #ifdef CONFIG_HOTPLUG_CPU - cpufreq_cpu_governor[j] = data->governor; + per_cpu(cpufreq_cpu_governor, j) = data->governor; #endif cpu_sys_dev = get_cpu_sysdev(j); sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); @@ -1153,7 +1153,7 @@ EXPORT_SYMBOL(cpufreq_quick_get); static unsigned int __cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_data[cpu]; + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); unsigned int ret_freq = 0; if (!cpufreq_driver->get) @@ -1822,16 +1822,19 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver); + ret = sysdev_driver_register(&cpu_sysdev_class, + &cpufreq_sysdev_driver); if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { int i; ret = -ENODEV; /* check for at least one working CPU */ - for (i=0; iinit() calls failed, unregister */ if (ret) { diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index ae70d63a8b2..c0ff97d375d 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -43,7 +43,7 @@ struct cpufreq_stats { #endif }; -static struct cpufreq_stats *cpufreq_stats_table[NR_CPUS]; +static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); struct cpufreq_stats_attribute { struct attribute attr; @@ -58,7 +58,7 @@ cpufreq_stats_update (unsigned int cpu) cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); - stat = cpufreq_stats_table[cpu]; + stat = per_cpu(cpufreq_stats_table, cpu); if (stat->time_in_state) stat->time_in_state[stat->last_index] = cputime64_add(stat->time_in_state[stat->last_index], @@ -71,11 +71,11 @@ cpufreq_stats_update (unsigned int cpu) static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) { - struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu]; + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); if (!stat) return 0; return sprintf(buf, "%d\n", - cpufreq_stats_table[stat->cpu]->total_trans); + per_cpu(cpufreq_stats_table, stat->cpu)->total_trans); } static ssize_t @@ -83,7 +83,7 @@ show_time_in_state(struct cpufreq_policy *policy, char *buf) { ssize_t len = 0; int i; - struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu]; + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); if (!stat) return 0; cpufreq_stats_update(stat->cpu); @@ -101,7 +101,7 @@ show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j; - struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu]; + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); if (!stat) return 0; cpufreq_stats_update(stat->cpu); @@ -170,7 +170,7 @@ freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) static void cpufreq_stats_free_table(unsigned int cpu) { - struct cpufreq_stats *stat = cpufreq_stats_table[cpu]; + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu); struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); if (policy && policy->cpu == cpu) sysfs_remove_group(&policy->kobj, &stats_attr_group); @@ -178,7 +178,7 @@ static void cpufreq_stats_free_table(unsigned int cpu) kfree(stat->time_in_state); kfree(stat); } - cpufreq_stats_table[cpu] = NULL; + per_cpu(cpufreq_stats_table, cpu) = NULL; if (policy) cpufreq_cpu_put(policy); } @@ -192,7 +192,7 @@ cpufreq_stats_create_table (struct cpufreq_policy *policy, struct cpufreq_policy *data; unsigned int alloc_size; unsigned int cpu = policy->cpu; - if (cpufreq_stats_table[cpu]) + if (per_cpu(cpufreq_stats_table, cpu)) return -EBUSY; if ((stat = kzalloc(sizeof(struct cpufreq_stats), GFP_KERNEL)) == NULL) return -ENOMEM; @@ -207,7 +207,7 @@ cpufreq_stats_create_table (struct cpufreq_policy *policy, goto error_out; stat->cpu = cpu; - cpufreq_stats_table[cpu] = stat; + per_cpu(cpufreq_stats_table, cpu) = stat; for (i=0; table[i].frequency != CPUFREQ_TABLE_END; i++) { unsigned int freq = table[i].frequency; @@ -251,7 +251,7 @@ error_out: cpufreq_cpu_put(data); error_get_fail: kfree(stat); - cpufreq_stats_table[cpu] = NULL; + per_cpu(cpufreq_stats_table, cpu) = NULL; return ret; } @@ -284,7 +284,7 @@ cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, if (val != CPUFREQ_POSTCHANGE) return 0; - stat = cpufreq_stats_table[freq->cpu]; + stat = per_cpu(cpufreq_stats_table, freq->cpu); if (!stat) return 0; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index ae6cd60d5c1..fe485c9515d 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -169,7 +169,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); -static struct cpufreq_frequency_table *show_table[NR_CPUS]; +static DEFINE_PER_CPU(struct cpufreq_frequency_table *, show_table); /** * show_available_freqs - show available frequencies for the specified CPU */ @@ -180,10 +180,10 @@ static ssize_t show_available_freqs (struct cpufreq_policy *policy, char *buf) ssize_t count = 0; struct cpufreq_frequency_table *table; - if (!show_table[cpu]) + if (!per_cpu(show_table, cpu)) return -ENODEV; - table = show_table[cpu]; + table = per_cpu(show_table, cpu); for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { if (table[i].frequency == CPUFREQ_ENTRY_INVALID) @@ -212,20 +212,20 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu) { dprintk("setting show_table for cpu %u to %p\n", cpu, table); - show_table[cpu] = table; + per_cpu(show_table, cpu) = table; } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr); void cpufreq_frequency_table_put_attr(unsigned int cpu) { dprintk("clearing show_table for cpu %u\n", cpu); - show_table[cpu] = NULL; + per_cpu(show_table, cpu) = NULL; } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr); struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) { - return show_table[cpu]; + return per_cpu(show_table, cpu); } EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); -- cgit v1.2.3-70-g09d2 From 068b12772a64c2440ef2f64ac5d780688c06576f Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: cpufreq: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- drivers/cpufreq/cpufreq.c | 14 +++++++------- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7fce038fa57..4937f2f97d8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -589,7 +589,7 @@ static ssize_t show_cpus(cpumask_t mask, char *buf) ssize_t i = 0; unsigned int cpu; - for_each_cpu_mask(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { if (i) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); @@ -835,7 +835,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } #endif - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { if (cpu == j) continue; @@ -898,14 +898,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { cpufreq_cpu_data[j] = policy; per_cpu(policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); /* symlink affected CPUs */ - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { if (j == cpu) continue; if (!cpu_online(j)) @@ -945,7 +945,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask(j, policy->cpus) + for_each_cpu_mask_nr(j, policy->cpus) cpufreq_cpu_data[j] = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -1028,7 +1028,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) * links afterwards. */ if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask(j, data->cpus) { + for_each_cpu_mask_nr(j, data->cpus) { if (j == cpu) continue; cpufreq_cpu_data[j] = NULL; @@ -1038,7 +1038,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask(j, data->cpus) { + for_each_cpu_mask_nr(j, data->cpus) { if (j == cpu) continue; dprintk("removing link for cpu %u\n", j); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 5d3a04ba6ad..fe565ee4375 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -497,7 +497,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d2af20dda38..33855cb3cf1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -367,7 +367,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Get Idle Time */ idle_ticks = UINT_MAX; - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { cputime64_t total_idle_ticks; unsigned int tmp_idle_ticks; struct cpu_dbs_info_s *j_dbs_info; @@ -521,7 +521,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; -- cgit v1.2.3-70-g09d2 From b38868aabeeb9c0c76a41ac5fa98c24bf0096f2a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:32 -0700 Subject: NR_CPUS: Replace NR_CPUS in cpufreq userspace routines * Replace arrays sized by NR_CPUS with percpu variables. Prior reference: http://marc.info/?l=linux-kernel&m=120251421825989&w=4 Subject: [PATCH 1/4] cpufreq: change cpu freq tables to per_cpu variables From: Mike Travis Date: 2008-02-08 23:37:39 Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- drivers/cpufreq/cpufreq_userspace.c | 79 ++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 36 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index cb2ac01a41a..32244aa7cc0 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -30,16 +30,18 @@ /** * A few values needed by the userspace governor */ -static unsigned int cpu_max_freq[NR_CPUS]; -static unsigned int cpu_min_freq[NR_CPUS]; -static unsigned int cpu_cur_freq[NR_CPUS]; /* current CPU freq */ -static unsigned int cpu_set_freq[NR_CPUS]; /* CPU freq desired by userspace */ -static unsigned int cpu_is_managed[NR_CPUS]; +static DEFINE_PER_CPU(unsigned int, cpu_max_freq); +static DEFINE_PER_CPU(unsigned int, cpu_min_freq); +static DEFINE_PER_CPU(unsigned int, cpu_cur_freq); /* current CPU freq */ +static DEFINE_PER_CPU(unsigned int, cpu_set_freq); /* CPU freq desired by + userspace */ +static DEFINE_PER_CPU(unsigned int, cpu_is_managed); static DEFINE_MUTEX (userspace_mutex); static int cpus_using_userspace_governor; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) /* keep track of frequency transitions */ static int @@ -48,12 +50,12 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val, { struct cpufreq_freqs *freq = data; - if (!cpu_is_managed[freq->cpu]) + if (!per_cpu(cpu_is_managed, freq->cpu)) return 0; dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n", freq->cpu, freq->new); - cpu_cur_freq[freq->cpu] = freq->new; + per_cpu(cpu_cur_freq, freq->cpu) = freq->new; return 0; } @@ -77,15 +79,15 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); mutex_lock(&userspace_mutex); - if (!cpu_is_managed[policy->cpu]) + if (!per_cpu(cpu_is_managed, policy->cpu)) goto err; - cpu_set_freq[policy->cpu] = freq; + per_cpu(cpu_set_freq, policy->cpu) = freq; - if (freq < cpu_min_freq[policy->cpu]) - freq = cpu_min_freq[policy->cpu]; - if (freq > cpu_max_freq[policy->cpu]) - freq = cpu_max_freq[policy->cpu]; + if (freq < per_cpu(cpu_min_freq, policy->cpu)) + freq = per_cpu(cpu_min_freq, policy->cpu); + if (freq > per_cpu(cpu_max_freq, policy->cpu)) + freq = per_cpu(cpu_max_freq, policy->cpu); /* * We're safe from concurrent calls to ->target() here @@ -104,7 +106,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) { - return sprintf(buf, "%u\n", cpu_cur_freq[policy->cpu]); + return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu)); } static int cpufreq_governor_userspace(struct cpufreq_policy *policy, @@ -127,12 +129,17 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, } cpus_using_userspace_governor++; - cpu_is_managed[cpu] = 1; - cpu_min_freq[cpu] = policy->min; - cpu_max_freq[cpu] = policy->max; - cpu_cur_freq[cpu] = policy->cur; - cpu_set_freq[cpu] = policy->cur; - dprintk("managing cpu %u started (%u - %u kHz, currently %u kHz)\n", cpu, cpu_min_freq[cpu], cpu_max_freq[cpu], cpu_cur_freq[cpu]); + per_cpu(cpu_is_managed, cpu) = 1; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + per_cpu(cpu_set_freq, cpu) = policy->cur; + dprintk("managing cpu %u started " + "(%u - %u kHz, currently %u kHz)\n", + cpu, + per_cpu(cpu_min_freq, cpu), + per_cpu(cpu_max_freq, cpu), + per_cpu(cpu_cur_freq, cpu)); mutex_unlock(&userspace_mutex); break; @@ -145,34 +152,34 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, CPUFREQ_TRANSITION_NOTIFIER); } - cpu_is_managed[cpu] = 0; - cpu_min_freq[cpu] = 0; - cpu_max_freq[cpu] = 0; - cpu_set_freq[cpu] = 0; + per_cpu(cpu_is_managed, cpu) = 0; + per_cpu(cpu_min_freq, cpu) = 0; + per_cpu(cpu_max_freq, cpu) = 0; + per_cpu(cpu_set_freq, cpu) = 0; dprintk("managing cpu %u stopped\n", cpu); mutex_unlock(&userspace_mutex); break; case CPUFREQ_GOV_LIMITS: mutex_lock(&userspace_mutex); - dprintk("limit event for cpu %u: %u - %u kHz," + dprintk("limit event for cpu %u: %u - %u kHz, " "currently %u kHz, last set to %u kHz\n", cpu, policy->min, policy->max, - cpu_cur_freq[cpu], cpu_set_freq[cpu]); - if (policy->max < cpu_set_freq[cpu]) { + per_cpu(cpu_cur_freq, cpu), + per_cpu(cpu_set_freq, cpu)); + if (policy->max < per_cpu(cpu_set_freq, cpu)) { __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - } - else if (policy->min > cpu_set_freq[cpu]) { + } else if (policy->min > per_cpu(cpu_set_freq, cpu)) { __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - } - else { - __cpufreq_driver_target(policy, cpu_set_freq[cpu], + } else { + __cpufreq_driver_target(policy, + per_cpu(cpu_set_freq, cpu), CPUFREQ_RELATION_L); } - cpu_min_freq[cpu] = policy->min; - cpu_max_freq[cpu] = policy->max; - cpu_cur_freq[cpu] = policy->cur; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; mutex_unlock(&userspace_mutex); break; } -- cgit v1.2.3-70-g09d2 From a1531acd43310a7e4571d52e8846640667f4c74b Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 29 Jul 2008 22:32:58 -0700 Subject: cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called already Ingo Molnar provided a fix to not call _PPC at processor driver initialization time in "[PATCH] ACPI: fix cpufreq regression" (git commit e4233dec749a3519069d9390561b5636a75c7579) But it can still happen that _PPC is called at processor driver initialization time. This patch should make sure that this is not possible anymore. Signed-off-by: Thomas Renninger Cc: Andi Kleen Cc: Len Brown Cc: Dave Jones Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c | 6 ++++++ drivers/acpi/processor_perflib.c | 15 +++++++++++++-- drivers/cpufreq/cpufreq.c | 3 +++ include/linux/cpufreq.h | 1 + 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'drivers/cpufreq') diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c index 69288f65314..3233fe84d15 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -96,6 +96,12 @@ static int pmi_notifier(struct notifier_block *nb, struct cpufreq_frequency_table *cbe_freqs; u8 node; + /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE + * and CPUFREQ_NOTIFY policy events?) + */ + if (event == CPUFREQ_START) + return 0; + cbe_freqs = cpufreq_frequency_get_table(policy->cpu); node = cbe_cpu_to_node(policy->cpu); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b4749969c6b..e98071a6481 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex); * policy is adjusted accordingly. */ -static unsigned int ignore_ppc = 0; +/* ignore_ppc: + * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet + * ignore _PPC + * 0 -> cpufreq low level drivers initialized -> consider _PPC values + * 1 -> ignore _PPC totally -> forced by user through boot param + */ +static unsigned int ignore_ppc = -1; module_param(ignore_ppc, uint, 0644); MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ "limited by BIOS, this should help"); @@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ #define PPC_REGISTERED 1 #define PPC_IN_USE 2 -static int acpi_processor_ppc_status = 0; +static int acpi_processor_ppc_status; static int acpi_processor_ppc_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -81,6 +87,11 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, struct acpi_processor *pr; unsigned int ppc = 0; + if (event == CPUFREQ_START && ignore_ppc <= 0) { + ignore_ppc = 0; + return 0; + } + if (ignore_ppc) return 0; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d6a3ff0267..8a67f16987d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) policy->user_policy.min = policy->cpuinfo.min_freq; policy->user_policy.max = policy->cpuinfo.max_freq; + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2270ca5ec63..6fd5668aa57 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -106,6 +106,7 @@ struct cpufreq_policy { #define CPUFREQ_ADJUST (0) #define CPUFREQ_INCOMPATIBLE (1) #define CPUFREQ_NOTIFY (2) +#define CPUFREQ_START (3) #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ -- cgit v1.2.3-70-g09d2 From f068c04ba6f308774fdd2ed5e113da7cf4ff2f2b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 30 Jul 2008 12:59:56 -0400 Subject: [CPUFREQ] Fix -Wshadow warning in conservative governor. drivers/cpufreq/cpufreq_conservative.c:336:15: warning: symbol 'freq_step' shadows an earlier one Just rename the local variable. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index fe565ee4375..ac0bbf2d234 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -333,7 +333,7 @@ static void dbs_check_cpu(int cpu) { unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; unsigned int tmp_idle_ticks, total_idle_ticks; - unsigned int freq_step; + unsigned int freq_target; unsigned int freq_down_sampling_rate; struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); struct cpufreq_policy *policy; @@ -383,13 +383,13 @@ static void dbs_check_cpu(int cpu) if (this_dbs_info->requested_freq == policy->max) return; - freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; /* max freq cannot be less than 100. But who knows.... */ - if (unlikely(freq_step == 0)) - freq_step = 5; + if (unlikely(freq_target == 0)) + freq_target = 5; - this_dbs_info->requested_freq += freq_step; + this_dbs_info->requested_freq += freq_target; if (this_dbs_info->requested_freq > policy->max) this_dbs_info->requested_freq = policy->max; @@ -425,19 +425,19 @@ static void dbs_check_cpu(int cpu) /* * if we are already at the lowest speed then break out early * or if we 'cannot' reduce the speed as the user might want - * freq_step to be zero + * freq_target to be zero */ if (this_dbs_info->requested_freq == policy->min || dbs_tuners_ins.freq_step == 0) return; - freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; /* max freq cannot be less than 100. But who knows.... */ - if (unlikely(freq_step == 0)) - freq_step = 5; + if (unlikely(freq_target == 0)) + freq_target = 5; - this_dbs_info->requested_freq -= freq_step; + this_dbs_info->requested_freq -= freq_target; if (this_dbs_info->requested_freq < policy->min) this_dbs_info->requested_freq = policy->min; -- cgit v1.2.3-70-g09d2 From 888a794cacd8950ac6be701db20b62a4ab2ce90c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 14 Jul 2008 12:00:45 +0900 Subject: [CPUFREQ] add error handling for cpufreq_register_governor() error Add error handling for cpufreq_register_governor() error Signed-off-by: Akinobu Mita Cc: cpufreq@lists.linux.org.uk Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 33855cb3cf1..67c9d4f9edc 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -589,12 +589,18 @@ EXPORT_SYMBOL(cpufreq_gov_ondemand); static int __init cpufreq_gov_dbs_init(void) { + int err; + kondemand_wq = create_workqueue("kondemand"); if (!kondemand_wq) { printk(KERN_ERR "Creation of kondemand failed\n"); return -EFAULT; } - return cpufreq_register_governor(&cpufreq_gov_ondemand); + err = cpufreq_register_governor(&cpufreq_gov_ondemand); + if (err) + destroy_workqueue(kondemand_wq); + + return err; } static void __exit cpufreq_gov_dbs_exit(void) -- cgit v1.2.3-70-g09d2 From f1829e4a371f26430185a9d5b97b8d9d19824e08 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 25 Jul 2008 22:44:53 +0200 Subject: [CPUFREQ] drivers/cpufreq/cpufreq.c: Adjust error handling code involving cpufreq_cpu_put After calling cpufreq_cpu_get, error handling code should call cpufreq_cpu_put. The semantic match that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r@ expression x,E; statement S; position p1,p2,p3; @@ ( if ((x = cpufreq_cpu_get@p1(...)) == NULL || ...) S | x = cpufreq_cpu_get@p1(...) ... when != x if (x == NULL || ...) S ) <... if@p3 (...) { ... when != cpufreq_cpu_put(x) when != if (x) { ... cpufreq_cpu_put(x); ...} return@p2 ...; } ...> ( return x; | return 0; | x = E | E = x | cpufreq_cpu_put(x) ) @exists@ position r.p1,r.p2,r.p3; expression x; int ret != 0; statement S; @@ * x = cpufreq_cpu_get@p1(...) <... * if@p3 (...) S ...> * return@p2 \(NULL\|ret\); // Signed-off-by: Julia Lawall Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8a67f16987d..9bbdc258624 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1467,20 +1467,22 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - int ret; + int ret = -EINVAL; policy = cpufreq_cpu_get(policy->cpu); if (!policy) - return -EINVAL; + goto no_policy; if (unlikely(lock_policy_rwsem_write(policy->cpu))) - return -EINVAL; + goto fail; ret = __cpufreq_driver_target(policy, target_freq, relation); unlock_policy_rwsem_write(policy->cpu); +fail: cpufreq_cpu_put(policy); +no_policy: return ret; } EXPORT_SYMBOL_GPL(cpufreq_driver_target); @@ -1717,13 +1719,17 @@ int cpufreq_update_policy(unsigned int cpu) { struct cpufreq_policy *data = cpufreq_cpu_get(cpu); struct cpufreq_policy policy; - int ret = 0; + int ret; - if (!data) - return -ENODEV; + if (!data) { + ret = -ENODEV; + goto no_policy; + } - if (unlikely(lock_policy_rwsem_write(cpu))) - return -EINVAL; + if (unlikely(lock_policy_rwsem_write(cpu))) { + ret = -EINVAL; + goto fail; + } dprintk("updating policy for CPU %u\n", cpu); memcpy(&policy, data, sizeof(struct cpufreq_policy)); @@ -1750,7 +1756,9 @@ int cpufreq_update_policy(unsigned int cpu) unlock_policy_rwsem_write(cpu); +fail: cpufreq_cpu_put(data); +no_policy: return ret; } EXPORT_SYMBOL(cpufreq_update_policy); -- cgit v1.2.3-70-g09d2 From 8217e4f4c93e5fb59bb3cd1e6135213889349f86 Mon Sep 17 00:00:00 2001 From: Ben Slusky Date: Mon, 7 Jul 2008 13:16:20 -0400 Subject: [CPUFREQ] use deferrable delayed work init in conservative governor Venki Pallipadi made a similar change to the ondemand governor a while back (in commit 28287033e12463c8ff89f1ea8038783d0360391c). It seems to work just as well in the conservative governor, leading to fewer wakeups as reported by powertop. Signed-off-by: Ben Slusky Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ac0bbf2d234..4ee53a4c6d2 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -460,6 +460,7 @@ static void do_dbs_timer(struct work_struct *work) static inline void dbs_timer_init(void) { + init_timer_deferrable(&dbs_work.timer); schedule_delayed_work(&dbs_work, usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); return; -- cgit v1.2.3-70-g09d2 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- drivers/cpufreq/cpufreq.c | 6 +++--- drivers/cpufreq/cpufreq_ondemand.c | 2 +- include/linux/cpufreq.h | 7 +++++-- 4 files changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/cpufreq') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1def5b06fa4..c24c4a487b7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ -static unsigned int get_measured_perf(unsigned int cpu) +static unsigned int get_measured_perf(struct cpufreq_policy *policy, + unsigned int cpu) { union { struct { @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) #endif - retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; + retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; put_cpu(); set_cpus_allowed_ptr(current, &saved_mask); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9bbdc258624..31d6f535a79 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1487,7 +1487,7 @@ no_policy: } EXPORT_SYMBOL_GPL(cpufreq_driver_target); -int __cpufreq_driver_getavg(struct cpufreq_policy *policy) +int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; @@ -1495,8 +1495,8 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy) if (!policy) return -EINVAL; - if (cpu_online(policy->cpu) && cpufreq_driver->getavg) - ret = cpufreq_driver->getavg(policy->cpu); + if (cpu_online(cpu) && cpufreq_driver->getavg) + ret = cpufreq_driver->getavg(policy, cpu); cpufreq_cpu_put(policy); return ret; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 67c9d4f9edc..f56debd9a8d 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -415,7 +415,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) if (load < (dbs_tuners_ins.up_threshold - 10)) { unsigned int freq_next, freq_cur; - freq_cur = __cpufreq_driver_getavg(policy); + freq_cur = __cpufreq_driver_getavg(policy, policy->cpu); if (!freq_cur) freq_cur = policy->cur; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6fd5668aa57..1ee608fd7b7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -187,7 +187,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); +extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy, + unsigned int cpu); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -226,7 +227,9 @@ struct cpufreq_driver { unsigned int (*get) (unsigned int cpu); /* optional */ - unsigned int (*getavg) (unsigned int cpu); + unsigned int (*getavg) (struct cpufreq_policy *policy, + unsigned int cpu); + int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); -- cgit v1.2.3-70-g09d2 From c43aa3bd99a67009a167430e80c5fde6f37288d8 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:08 -0700 Subject: [CPUFREQ][2/6] cpufreq: Change load calculation in ondemand for software coordination Change the load calculation algorithm in ondemand to work well with software coordination of frequency across the dependent cpus. Multiply individual CPU utilization with the average freq of that logical CPU during the measurement interval (using getavg call). And find the max CPU utilization number in terms of CPU freq. That number is then used to get to the target freq for next sampling interval. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 65 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 30 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f56debd9a8d..42fcb146ba2 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -334,9 +334,7 @@ static struct attribute_group dbs_attr_group = { static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { - unsigned int idle_ticks, total_ticks; - unsigned int load = 0; - cputime64_t cur_jiffies; + unsigned int max_load_freq; struct cpufreq_policy *policy; unsigned int j; @@ -346,13 +344,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; - cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); - total_ticks = (unsigned int) cputime64_sub(cur_jiffies, - this_dbs_info->prev_cpu_wall); - this_dbs_info->prev_cpu_wall = get_jiffies_64(); - if (!total_ticks) - return; /* * Every sampling_rate, we check, if current idle time is less * than 20% (default), then we try to increase frequency @@ -365,27 +357,46 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * 5% (default) of current frequency */ - /* Get Idle Time */ - idle_ticks = UINT_MAX; + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + for_each_cpu_mask_nr(j, policy->cpus) { - cputime64_t total_idle_ticks; - unsigned int tmp_idle_ticks; struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time; + unsigned int idle_time, wall_time; + unsigned int load, load_freq; + int freq_avg; j_dbs_info = &per_cpu(cpu_dbs_info, j); - total_idle_ticks = get_cpu_idle_time(j); - tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks, + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + cur_idle_time = get_cpu_idle_time(j); + idle_time = (unsigned int) cputime64_sub(cur_idle_time, j_dbs_info->prev_cpu_idle); - j_dbs_info->prev_cpu_idle = total_idle_ticks; + j_dbs_info->prev_cpu_idle = cur_idle_time; - if (tmp_idle_ticks < idle_ticks) - idle_ticks = tmp_idle_ticks; + if (unlikely(wall_time <= idle_time || + (cputime_to_msecs(wall_time) < + dbs_tuners_ins.sampling_rate / (2 * 1000)))) { + continue; + } + + load = 100 * (wall_time - idle_time) / wall_time; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; } - if (likely(total_ticks > idle_ticks)) - load = (100 * (total_ticks - idle_ticks)) / total_ticks; /* Check for frequency increase */ - if (load > dbs_tuners_ins.up_threshold) { + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { /* if we are already at full speed then break out early */ if (!dbs_tuners_ins.powersave_bias) { if (policy->cur == policy->max) @@ -412,15 +423,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ - if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next, freq_cur; - - freq_cur = __cpufreq_driver_getavg(policy, policy->cpu); - if (!freq_cur) - freq_cur = policy->cur; - - freq_next = (freq_cur * load) / - (dbs_tuners_ins.up_threshold - 10); + if (max_load_freq < (dbs_tuners_ins.up_threshold - 10) * policy->cur) { + unsigned int freq_next; + freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10); if (!dbs_tuners_ins.powersave_bias) { __cpufreq_driver_target(policy, freq_next, -- cgit v1.2.3-70-g09d2 From 3430502d356284ff4f7782d75bb01a402fd3d45e Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:09 -0700 Subject: [CPUFREQ][3/6] cpufreq: get_cpu_idle_time() changes in ondemand for idle-microaccounting Preparatory changes for doing idle micro-accounting in ondemand governor. get_cpu_idle_time() gets extra parameter and returns idle time and also the wall time that corresponds to the idle time measurement. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 42fcb146ba2..b935092aab2 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -94,13 +94,13 @@ static struct dbs_tuners { .powersave_bias = 0, }; -static inline cputime64_t get_cpu_idle_time(unsigned int cpu) +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { cputime64_t idle_time; - cputime64_t cur_jiffies; + cputime64_t cur_wall_time; cputime64_t busy_time; - cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, kstat_cpu(cpu).cpustat.system); @@ -113,7 +113,10 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu) kstat_cpu(cpu).cpustat.nice); } - idle_time = cputime64_sub(cur_jiffies, busy_time); + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = cur_wall_time; + return idle_time; } @@ -277,8 +280,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, for_each_online_cpu(j) { struct cpu_dbs_info_s *dbs_info; dbs_info = &per_cpu(cpu_dbs_info, j); - dbs_info->prev_cpu_idle = get_cpu_idle_time(j); - dbs_info->prev_cpu_wall = get_jiffies_64(); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); } mutex_unlock(&dbs_mutex); @@ -368,21 +371,19 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) int freq_avg; j_dbs_info = &per_cpu(cpu_dbs_info, j); - cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + wall_time = (unsigned int) cputime64_sub(cur_wall_time, j_dbs_info->prev_cpu_wall); j_dbs_info->prev_cpu_wall = cur_wall_time; - cur_idle_time = get_cpu_idle_time(j); idle_time = (unsigned int) cputime64_sub(cur_idle_time, j_dbs_info->prev_cpu_idle); j_dbs_info->prev_cpu_idle = cur_idle_time; - if (unlikely(wall_time <= idle_time || - (cputime_to_msecs(wall_time) < - dbs_tuners_ins.sampling_rate / (2 * 1000)))) { + if (unlikely(!wall_time || wall_time < idle_time)) continue; - } load = 100 * (wall_time - idle_time) / wall_time; @@ -531,8 +532,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); - j_dbs_info->prev_cpu_wall = get_jiffies_64(); + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); } this_dbs_info->cpu = cpu; /* -- cgit v1.2.3-70-g09d2 From e9d95bf7eb929b9ddc9af9f4327b76c77ed4c7d6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:10 -0700 Subject: [CPUFREQ][4/6] cpufreq_ondemand: Parameterize down differential Use a parameter for down differential, instead of hardcoded 10%. Follow-on patch changes the down-differential dynamically, based on whether we are using idle micro-accounting or not. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index b935092aab2..3f898606e68 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -24,6 +24,7 @@ * It helps to keep variable names smaller, simpler */ +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -86,10 +87,12 @@ static struct workqueue_struct *kondemand_wq; static struct dbs_tuners { unsigned int sampling_rate; unsigned int up_threshold; + unsigned int down_differential; unsigned int ignore_nice; unsigned int powersave_bias; } dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, }; @@ -424,9 +427,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ - if (max_load_freq < (dbs_tuners_ins.up_threshold - 10) * policy->cur) { + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { unsigned int freq_next; - freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10); + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); if (!dbs_tuners_ins.powersave_bias) { __cpufreq_driver_target(policy, freq_next, -- cgit v1.2.3-70-g09d2 From 808009131046b62ac434dbc796c0fe8accaab415 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:12 -0700 Subject: [CPUFREQ][6/6] cpufreq: Add idle microaccounting in ondemand governor Use get_cpu_idle_time_us() to get micro-accounted idle information. This enables ondemand to get more accurate idle and busy timings than the jiffy based calculation. As a result, we can decrease the ondemand safety gaurd band from 80-10 to 95-3. Results in more aggressive power savings. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 45 +++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3f898606e68..39253d6d51e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include /* * dbs is used in this file as a shortform for demandbased switching @@ -26,6 +29,8 @@ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -58,6 +63,7 @@ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; struct cpu_dbs_info_s { cputime64_t prev_cpu_idle; cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; struct cpufreq_policy *cur_policy; struct delayed_work work; struct cpufreq_frequency_table *freq_table; @@ -97,7 +103,8 @@ static struct dbs_tuners { .powersave_bias = 0, }; -static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) { cputime64_t idle_time; cputime64_t cur_wall_time; @@ -123,6 +130,33 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) return idle_time; } +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + struct cpu_dbs_info_s *dbs_info; + + dbs_info = &per_cpu(cpu_dbs_info, cpu); + cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice, + dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will be + * less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice; + return idle_time + jiffies_to_usecs(cur_nice_jiffies); + } + return idle_time; +} + /* * Find right freq to be set now with powersave_bias on. * Returns the freq_hi to be used right now and will set freq_hi_jiffies, @@ -603,6 +637,15 @@ EXPORT_SYMBOL(cpufreq_gov_ondemand); static int __init cpufreq_gov_dbs_init(void) { int err; + cputime64_t wall; + u64 idle_time = get_cpu_idle_time_us(smp_processor_id(), &wall); + + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + } kondemand_wq = create_workqueue("kondemand"); if (!kondemand_wq) { -- cgit v1.2.3-70-g09d2 From c4d14bc0bb5d13e316890651ae4518b764c3344c Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Sat, 20 Sep 2008 16:50:08 +0200 Subject: [CPUFREQ] Don't export governors for default governor We don't need to export the governors for use as the default governor, because the default governor will be built-in anyway and we can access the symbol directly. This also fixes the following sparse warnings: drivers/cpufreq/cpufreq_conservative.c:578:25: warning: symbol 'cpufreq_gov_conservative' was not declared. Should it be static? drivers/cpufreq/cpufreq_ondemand.c:582:25: warning: symbol 'cpufreq_gov_ondemand' was not declared. Should it be static? drivers/cpufreq/cpufreq_performance.c:39:25: warning: symbol 'cpufreq_gov_performance' was not declared. Should it be static? drivers/cpufreq/cpufreq_powersave.c:38:25: warning: symbol 'cpufreq_gov_powersave' was not declared. Should it be static? drivers/cpufreq/cpufreq_userspace.c:190:25: warning: symbol 'cpufreq_gov_userspace' was not declared. Should it be static? Signed-off-by: Sven Wegener Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 4 +++- drivers/cpufreq/cpufreq_ondemand.c | 4 +++- drivers/cpufreq/cpufreq_performance.c | 4 +++- drivers/cpufreq/cpufreq_powersave.c | 4 +++- drivers/cpufreq/cpufreq_userspace.c | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4ee53a4c6d2..e2657837d95 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -576,13 +576,15 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; } +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif struct cpufreq_governor cpufreq_gov_conservative = { .name = "conservative", .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_conservative); static int __init cpufreq_gov_dbs_init(void) { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 39253d6d51e..bd069349bca 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -626,13 +626,15 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; } +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif struct cpufreq_governor cpufreq_gov_ondemand = { .name = "ondemand", .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_ondemand); static int __init cpufreq_gov_dbs_init(void) { diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index e8e1451ef1c..7e2e515087f 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -36,12 +36,14 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, return 0; } +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +static +#endif struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .governor = cpufreq_governor_performance, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_performance); static int __init cpufreq_gov_performance_init(void) diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 88d2f44fba4..e6db5faf3eb 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -35,12 +35,14 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +static +#endif struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_powersave); static int __init cpufreq_gov_powersave_init(void) { diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 32244aa7cc0..1442bbada05 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -187,6 +187,9 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, } +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, @@ -194,7 +197,6 @@ struct cpufreq_governor cpufreq_gov_userspace = { .show_setspeed = show_speed, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_userspace); static int __init cpufreq_gov_userspace_init(void) { -- cgit v1.2.3-70-g09d2 From 4f6e6b9f97b0ce98a8d1da65adbaf743bd0486a9 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 18 Sep 2008 10:43:40 +0000 Subject: [CPUFREQ] Fix BUG: using smp_processor_id() in preemptible code Use get_cpu()/put_cpu() in cpufreq_ondemand init routine, instead of smp_processor_id() to avoid the following BUG: [ 35.313118] BUG: using smp_processor_id() in preemptible [00000000] code=: modprobe/4952 [ 35.313132] caller is cpufreq_gov_dbs_init+0xa/0x8f [cpufreq_ondemand] [ 35.313140] Pid: 4952, comm: modprobe Not tainted 2.6.27-rc5-mm1 #23 [ 35.313145] Call Trace: [ 35.313158] [] debug_smp_processor_id+0xd7/0xe0 [ 35.313167] [] cpufreq_gov_dbs_init+0xa/0x8f [cpufreq_ondemand] [ 35.313176] [] _stext+0x3b/0x160 [ 35.313185] [] __mutex_unlock_slowpath+0xe5/0x190 [ 35.313195] [] trace_hardirqs_on_caller+0xca/0x140 [ 35.313205] [] sys_init_module+0xdc/0x210 [ 35.313212] [] system_call_fastpath+0x16/0x1b Signed-off-by: Andrea Righi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index bd069349bca..2ab3c12b88a 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -640,8 +640,11 @@ static int __init cpufreq_gov_dbs_init(void) { int err; cputime64_t wall; - u64 idle_time = get_cpu_idle_time_us(smp_processor_id(), &wall); + u64 idle_time; + int cpu = get_cpu(); + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; -- cgit v1.2.3-70-g09d2