diff options
author | Tejun Heo <tj@kernel.org> | 2009-08-14 14:41:02 +0900 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-08-14 14:45:31 +0900 |
commit | 384be2b18a5f9475eab9ca2bdfa95cc1a04ef59c (patch) | |
tree | 04c93f391a1b65c8bf8d7ba8643c07d26c26590a /drivers/cpufreq | |
parent | a76761b621bcd8336065c4fe3a74f046858bc34c (diff) | |
parent | 142d44b0dd6741a64a7bdbe029110e7c1dcf1d23 (diff) |
Merge branch 'percpu-for-linus' into percpu-for-next
Conflicts:
arch/sparc/kernel/smp_64.c
arch/x86/kernel/cpu/perf_counter.c
arch/x86/kernel/setup_percpu.c
drivers/cpufreq/cpufreq_ondemand.c
mm/percpu.c
Conflicts in core and arch percpu codes are mostly from commit
ed78e1e078dd44249f88b1dd8c76dafb39567161 which substituted many
num_possible_cpus() with nr_cpu_ids. As for-next branch has moved all
the first chunk allocators into mm/percpu.c, the changes are moved
from arch code to mm/percpu.c.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 99 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 43 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 77 |
3 files changed, 118 insertions, 101 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6e2ec0b1894..fd69086d08d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -761,6 +761,10 @@ static struct kobj_type ktype_cpufreq = { * cpufreq_add_dev - add a CPU device * * Adds the cpufreq interface for a CPU device. + * + * The Oracle says: try running cpufreq registration/unregistration concurrently + * with with cpu hotplugging and all hell will break loose. Tried to clean this + * mess up, but more thorough testing is needed. - Mathieu */ static int cpufreq_add_dev(struct sys_device *sys_dev) { @@ -772,9 +776,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) struct sys_device *cpu_sys_dev; unsigned long flags; unsigned int j; -#ifdef CONFIG_SMP - struct cpufreq_policy *managed_policy; -#endif if (cpu_is_offline(cpu)) return 0; @@ -804,15 +805,12 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) goto nomem_out; } if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { - kfree(policy); ret = -ENOMEM; - goto nomem_out; + goto err_free_policy; } if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { - free_cpumask_var(policy->cpus); - kfree(policy); ret = -ENOMEM; - goto nomem_out; + goto err_free_cpumask; } policy->cpu = cpu; @@ -820,7 +818,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* Initially set CPU itself as the policy_cpu */ per_cpu(policy_cpu, cpu) = cpu; - lock_policy_rwsem_write(cpu); + ret = (lock_policy_rwsem_write(cpu) < 0); + WARN_ON(ret); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); @@ -833,7 +832,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) ret = cpufreq_driver->init(policy); if (ret) { dprintk("initialization failed\n"); - goto err_out; + goto err_unlock_policy; } policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -852,21 +851,31 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #endif for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + if (cpu == j) continue; /* Check for existing affected CPUs. * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() */ - managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ + managed_policy = cpufreq_cpu_get(j); if (unlikely(managed_policy)) { /* Set proper policy_cpu */ unlock_policy_rwsem_write(cpu); per_cpu(policy_cpu, cpu) = managed_policy->cpu; - if (lock_policy_rwsem_write(cpu) < 0) - goto err_out_driver_exit; + if (lock_policy_rwsem_write(cpu) < 0) { + /* Should not go through policy unlock path */ + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + ret = -EBUSY; + cpufreq_cpu_put(managed_policy); + goto err_free_cpumask; + } spin_lock_irqsave(&cpufreq_driver_lock, flags); cpumask_copy(managed_policy->cpus, policy->cpus); @@ -878,11 +887,13 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) &managed_policy->kobj, "cpufreq"); if (ret) - goto err_out_driver_exit; - - cpufreq_debug_enable_ratelimit(); - ret = 0; - goto err_out_driver_exit; /* call driver->exit() */ + cpufreq_cpu_put(managed_policy); + /* + * Success. We only needed to be added to the mask. + * Call driver->exit() because only the cpu parent of + * the kobj needed to call init(). + */ + goto out_driver_exit; /* call driver->exit() */ } } #endif @@ -892,29 +903,31 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, "cpufreq"); if (ret) - goto err_out_driver_exit; + goto out_driver_exit; /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; while ((drv_attr) && (*drv_attr)) { ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; drv_attr++; } if (cpufreq_driver->get) { ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; } if (cpufreq_driver->target) { ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) - goto err_out_driver_exit; + goto err_out_kobj_put; } spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(policy_cpu, j) = policy->cpu; } @@ -922,18 +935,22 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* symlink affected CPUs */ for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + if (j == cpu) continue; if (!cpu_online(j)) continue; dprintk("CPU %u already managed, adding link\n", j); - cpufreq_cpu_get(cpu); + managed_policy = cpufreq_cpu_get(cpu); cpu_sys_dev = get_cpu_sysdev(j); ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, "cpufreq"); - if (ret) + if (ret) { + cpufreq_cpu_put(managed_policy); goto err_out_unregister; + } } policy->governor = NULL; /* to assure that the starting sequence is @@ -965,17 +982,20 @@ err_out_unregister: per_cpu(cpufreq_cpu_data, j) = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +err_out_kobj_put: kobject_put(&policy->kobj); wait_for_completion(&policy->kobj_unregister); -err_out_driver_exit: +out_driver_exit: if (cpufreq_driver->exit) cpufreq_driver->exit(policy); -err_out: +err_unlock_policy: unlock_policy_rwsem_write(cpu); +err_free_cpumask: + free_cpumask_var(policy->cpus); +err_free_policy: kfree(policy); - nomem_out: module_put(cpufreq_driver->owner); module_out: @@ -1070,8 +1090,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif - unlock_policy_rwsem_write(cpu); - if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); @@ -1088,6 +1106,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (cpufreq_driver->exit) cpufreq_driver->exit(data); + unlock_policy_rwsem_write(cpu); + free_cpumask_var(data->related_cpus); free_cpumask_var(data->cpus); kfree(data); @@ -1228,13 +1248,22 @@ EXPORT_SYMBOL(cpufreq_get); static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) { - int cpu = sysdev->id; int ret = 0; + +#ifdef __powerpc__ + int cpu = sysdev->id; unsigned int cur_freq = 0; struct cpufreq_policy *cpu_policy; dprintk("suspending cpu %u\n", cpu); + /* + * This whole bogosity is here because Powerbooks are made of fail. + * No sane platform should need any of the code below to be run. + * (it's entirely the wrong thing to do, as driver->get may + * reenable interrupts on some architectures). + */ + if (!cpu_online(cpu)) return 0; @@ -1293,6 +1322,7 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) out: cpufreq_cpu_put(cpu_policy); +#endif /* __powerpc__ */ return ret; } @@ -1306,12 +1336,18 @@ out: */ static int cpufreq_resume(struct sys_device *sysdev) { - int cpu = sysdev->id; int ret = 0; + +#ifdef __powerpc__ + int cpu = sysdev->id; struct cpufreq_policy *cpu_policy; dprintk("resuming cpu %u\n", cpu); + /* As with the ->suspend method, all the code below is + * only necessary because Powerbooks suck. + * See commit 42d4dc3f4e1e for jokes. */ + if (!cpu_online(cpu)) return 0; @@ -1375,6 +1411,7 @@ out: schedule_work(&cpu_policy->update); fail: cpufreq_cpu_put(cpu_policy); +#endif /* __powerpc__ */ return ret; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a7ef465c83b..bc33ddc9c97 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -64,21 +64,20 @@ struct cpu_dbs_info_s { unsigned int requested_freq; int cpu; unsigned int enable:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ /* - * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug - * lock and dbs_mutex. cpu_hotplug lock should always be held before - * dbs_mutex. If any function that can potentially take cpu_hotplug lock - * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then - * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock - * is recursive for the same process. -Venki - * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it - * would deadlock with cancel_delayed_work_sync(), which is needed for proper - * raceless workqueue teardown. + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. */ static DEFINE_MUTEX(dbs_mutex); @@ -488,18 +487,12 @@ static void do_dbs_timer(struct work_struct *work) delay -= jiffies % delay; - if (lock_policy_rwsem_write(cpu) < 0) - return; - - if (!dbs_info->enable) { - unlock_policy_rwsem_write(cpu); - return; - } + mutex_lock(&dbs_info->timer_mutex); dbs_check_cpu(dbs_info); queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); - unlock_policy_rwsem_write(cpu); + mutex_unlock(&dbs_info->timer_mutex); } static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) @@ -535,9 +528,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; - if (this_dbs_info->enable) /* Already enabled */ - break; - mutex_lock(&dbs_mutex); rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); @@ -561,6 +551,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->down_skip = 0; this_dbs_info->requested_freq = policy->cur; + mutex_init(&this_dbs_info->timer_mutex); dbs_enable++; /* * Start the timerschedule work, when this governor @@ -590,17 +581,19 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, &dbs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } - dbs_timer_init(this_dbs_info); - mutex_unlock(&dbs_mutex); + dbs_timer_init(this_dbs_info); + break; case CPUFREQ_GOV_STOP: - mutex_lock(&dbs_mutex); dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); sysfs_remove_group(&policy->kobj, &dbs_attr_group); dbs_enable--; + mutex_destroy(&this_dbs_info->timer_mutex); /* * Stop the timerschedule work, when this governor @@ -616,7 +609,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - mutex_lock(&dbs_mutex); + mutex_lock(&this_dbs_info->timer_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( this_dbs_info->cur_policy, @@ -625,7 +618,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, __cpufreq_driver_target( this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); - mutex_unlock(&dbs_mutex); + mutex_unlock(&this_dbs_info->timer_mutex); break; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 36f292a7bd0..d7a528c80de 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -70,23 +70,21 @@ struct cpu_dbs_info_s { unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; int cpu; - unsigned int enable:1, - sample_type:1; + unsigned int sample_type:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ /* - * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug - * lock and dbs_mutex. cpu_hotplug lock should always be held before - * dbs_mutex. If any function that can potentially take cpu_hotplug lock - * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then - * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock - * is recursive for the same process. -Venki - * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it - * would deadlock with cancel_delayed_work_sync(), which is needed for proper - * raceless workqueue teardown. + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on + * different CPUs. It protects dbs_enable in governor start/stop. */ static DEFINE_MUTEX(dbs_mutex); @@ -193,13 +191,18 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, return freq_hi; } +static void ondemand_powersave_bias_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + static void ondemand_powersave_bias_init(void) { int i; for_each_online_cpu(i) { - struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, i); - dbs_info->freq_table = cpufreq_frequency_get_table(i); - dbs_info->freq_lo = 0; + ondemand_powersave_bias_init_cpu(i); } } @@ -241,12 +244,10 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, unsigned int input; int ret; ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; mutex_lock(&dbs_mutex); - if (ret != 1) { - mutex_unlock(&dbs_mutex); - return -EINVAL; - } dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); @@ -260,13 +261,12 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, int ret; ret = sscanf(buf, "%u", &input); - mutex_lock(&dbs_mutex); if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || input < MIN_FREQUENCY_UP_THRESHOLD) { - mutex_unlock(&dbs_mutex); return -EINVAL; } + mutex_lock(&dbs_mutex); dbs_tuners_ins.up_threshold = input; mutex_unlock(&dbs_mutex); @@ -364,9 +364,6 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) struct cpufreq_policy *policy; unsigned int j; - if (!this_dbs_info->enable) - return; - this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; @@ -494,14 +491,7 @@ static void do_dbs_timer(struct work_struct *work) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; - - if (lock_policy_rwsem_write(cpu) < 0) - return; - - if (!dbs_info->enable) { - unlock_policy_rwsem_write(cpu); - return; - } + mutex_lock(&dbs_info->timer_mutex); /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = DBS_NORMAL_SAMPLE; @@ -518,7 +508,7 @@ static void do_dbs_timer(struct work_struct *work) dbs_info->freq_lo, CPUFREQ_RELATION_H); } queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); - unlock_policy_rwsem_write(cpu); + mutex_unlock(&dbs_info->timer_mutex); } static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) @@ -527,8 +517,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; - dbs_info->enable = 1; - ondemand_powersave_bias_init(); dbs_info->sample_type = DBS_NORMAL_SAMPLE; INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, @@ -537,7 +525,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { - dbs_info->enable = 0; cancel_delayed_work_sync(&dbs_info->work); } @@ -556,19 +543,15 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if ((!cpu_online(cpu)) || (!policy->cur)) return -EINVAL; - if (this_dbs_info->enable) /* Already enabled */ - break; - mutex_lock(&dbs_mutex); - dbs_enable++; rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); if (rc) { - dbs_enable--; mutex_unlock(&dbs_mutex); return rc; } + dbs_enable++; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(od_cpu_dbs_info, j); @@ -582,6 +565,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } } this_dbs_info->cpu = cpu; + ondemand_powersave_bias_init_cpu(cpu); + mutex_init(&this_dbs_info->timer_mutex); /* * Start the timerschedule work, when this governor * is used for first time @@ -599,29 +584,31 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, max(min_sampling_rate, latency * LATENCY_MULTIPLIER); } - dbs_timer_init(this_dbs_info); - mutex_unlock(&dbs_mutex); + + dbs_timer_init(this_dbs_info); break; case CPUFREQ_GOV_STOP: - mutex_lock(&dbs_mutex); dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); sysfs_remove_group(&policy->kobj, &dbs_attr_group); + mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; mutex_unlock(&dbs_mutex); break; case CPUFREQ_GOV_LIMITS: - mutex_lock(&dbs_mutex); + mutex_lock(&this_dbs_info->timer_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->max, CPUFREQ_RELATION_H); else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); - mutex_unlock(&dbs_mutex); + mutex_unlock(&this_dbs_info->timer_mutex); break; } return 0; |