From 2fdf66b491ac706657946442789ec644cc317e1a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 31 Dec 2008 18:08:47 -0800 Subject: cpumask: convert shared_cpu_map in acpi_processor* structs to cpumask_var_t Impact: Reduce memory usage, use new API. This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). (Changes to powernow-k* by .) Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 88ea02dcb62..d0a001093b2 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) } } +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) */ static int __init acpi_cpufreq_early_init(void) { + unsigned int i; dprintk("acpi_cpufreq_early_init\n"); acpi_perf_data = alloc_percpu(struct acpi_processor_performance); @@ -534,6 +546,15 @@ static int __init acpi_cpufreq_early_init(void) dprintk("Memory allocation error for acpi_perf_data.\n"); return -ENOMEM; } + for_each_possible_cpu(i) { + if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map, GFP_KERNEL)) { + + /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ + free_acpi_perf_data(); + return -ENOMEM; + } + } /* Do initialization in ACPI core */ acpi_processor_preregister_performance(acpi_perf_data); @@ -604,9 +625,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = perf->shared_cpu_map; + cpumask_copy(&policy->cpus, perf->shared_cpu_map); } - policy->related_cpus = perf->shared_cpu_map; + cpumask_copy(&policy->related_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); @@ -795,7 +816,7 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) - free_percpu(acpi_perf_data); + free_acpi_perf_data(); return ret; } -- cgit v1.2.3-70-g09d2 From 80855f7361eb68205e6bc1981928629d9b02d5c9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 31 Dec 2008 18:08:47 -0800 Subject: cpumask: use alloc_cpumask_var_node where appropriate Impact: Reduce inter-node memory traffic. Reduces inter-node memory traffic (offloading the global system bus) by allocating referenced struct cpumasks on the same node as the referring struct. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- arch/x86/kernel/io_apic.c | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index d0a001093b2..28102ad1a36 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -547,8 +547,9 @@ static int __init acpi_cpufreq_early_init(void) return -ENOMEM; } for_each_possible_cpu(i) { - if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i) - ->shared_cpu_map, GFP_KERNEL)) { + if (!alloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3e070bb961d..a25c3f76b8a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -212,11 +212,11 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { - /* FIXME: needs alloc_cpumask_var_node() */ - if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { + if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { kfree(cfg); cfg = NULL; - } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { + } else if (!alloc_cpumask_var_node(&cfg->old_domain, + GFP_ATOMIC, node)) { free_cpumask_var(cfg->domain); kfree(cfg); cfg = NULL; -- cgit v1.2.3-70-g09d2 From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 4 Jan 2009 05:18:06 -0800 Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t Impact: use new cpumask API to reduce memory usage This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dave Jones Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 10 +++--- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 8 ++--- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 6 ++-- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 14 ++++---- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 18 +++++----- drivers/cpufreq/cpufreq.c | 42 ++++++++++++++++-------- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 4 +-- include/linux/cpufreq.h | 4 +-- 10 files changed, 62 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 28102ad1a36..0b31939862d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -411,7 +411,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, #ifdef CONFIG_HOTPLUG_CPU /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); + cpumask_and(&online_policy_cpus, cpu_online_mask, policy->cpus); #else online_policy_cpus = policy->cpus; #endif @@ -626,15 +626,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - cpumask_copy(&policy->cpus, perf->shared_cpu_map); + cpumask_copy(policy->cpus, perf->shared_cpu_map); } - cpumask_copy(&policy->related_cpus, perf->shared_cpu_map); + cpumask_copy(policy->related_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(policy->cpus, cpu_core_mask(cpu)); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index beea4466b06..b585e04cbc9 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, return 0; /* notifiers */ - for_each_cpu_mask_nr(i, policy->cpus) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software * Developer's Manual, Volume 3 */ - for_each_cpu_mask_nr(i, policy->cpus) + for_each_cpu(i, policy->cpus) cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); /* notifiers */ - for_each_cpu_mask_nr(i, policy->cpus) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -203,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) unsigned int i; #ifdef CONFIG_SMP - policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); + cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); #endif /* Errata workaround */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c3c9adbaa26..5c28b37dea1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1199,10 +1199,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); + cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); else - pol->cpus = per_cpu(cpu_core_map, pol->cpu); - data->available_cores = &(pol->cpus); + cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); + data->available_cores = pol->cpus; /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 65cfb5d7f77..8ecc75b6c7c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -53,7 +53,7 @@ struct powernow_k8_data { /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ - cpumask_t *available_cores; + struct cpumask *available_cores; }; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index d2cc4991cba..f08998278a3 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -492,8 +492,8 @@ static int centrino_target (struct cpufreq_policy *policy, } first_cpu = 1; - for_each_cpu_mask_nr(j, policy->cpus) { - const cpumask_t *mask; + for_each_cpu(j, policy->cpus) { + const struct cpumask *mask; /* cpufreq holds the hotplug lock, so we are safe here */ if (!cpu_online(j)) @@ -504,9 +504,9 @@ static int centrino_target (struct cpufreq_policy *policy, * Make sure we are running on CPU that wants to change freq */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - mask = &policy->cpus; + mask = policy->cpus; else - mask = &cpumask_of_cpu(j); + mask = cpumask_of(j); set_cpus_allowed_ptr(current, mask); preempt_disable(); @@ -538,7 +538,7 @@ static int centrino_target (struct cpufreq_policy *policy, dprintk("target=%dkHz old=%d new=%d msr=%04x\n", target_freq, freqs.old, freqs.new, msr); - for_each_cpu_mask_nr(k, policy->cpus) { + for_each_cpu(k, policy->cpus) { if (!cpu_online(k)) continue; freqs.cpu = k; @@ -563,7 +563,7 @@ static int centrino_target (struct cpufreq_policy *policy, preempt_enable(); } - for_each_cpu_mask_nr(k, policy->cpus) { + for_each_cpu(k, policy->cpus) { if (!cpu_online(k)) continue; freqs.cpu = k; @@ -586,7 +586,7 @@ static int centrino_target (struct cpufreq_policy *policy, tmp = freqs.new; freqs.new = freqs.old; freqs.old = tmp; - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { if (!cpu_online(j)) continue; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 04d0376b64b..dedc1e98f16 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -229,7 +229,7 @@ static unsigned int speedstep_detect_chipset (void) return 0; } -static unsigned int _speedstep_get(const cpumask_t *cpus) +static unsigned int _speedstep_get(const struct cpumask *cpus) { unsigned int speed; cpumask_t cpus_allowed; @@ -244,7 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(&cpumask_of_cpu(cpu)); + return _speedstep_get(cpumask_of(cpu)); } /** @@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy, if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(&policy->cpus); + freqs.old = _speedstep_get(policy->cpus); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -279,20 +279,20 @@ static int speedstep_target (struct cpufreq_policy *policy, cpus_allowed = current->cpus_allowed; - for_each_cpu_mask_nr(i, policy->cpus) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } /* switch to physical CPU where state is to be changed */ - set_cpus_allowed_ptr(current, &policy->cpus); + set_cpus_allowed_ptr(current, policy->cpus); speedstep_set_state(newstate); /* allow to be run on all CPUs */ set_cpus_allowed_ptr(current, &cpus_allowed); - for_each_cpu_mask_nr(i, policy->cpus) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -322,11 +322,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); + cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); #endif cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, &policy->cpus); + set_cpus_allowed_ptr(current, policy->cpus); /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, @@ -339,7 +339,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) return result; /* get current speed setting */ - speed = _speedstep_get(&policy->cpus); + speed = _speedstep_get(policy->cpus); if (!speed) return -EIO; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 01dde80597f..b55cb67435b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -584,12 +584,12 @@ out: return i; } -static ssize_t show_cpus(cpumask_t mask, char *buf) +static ssize_t show_cpus(const struct cpumask *mask, char *buf) { ssize_t i = 0; unsigned int cpu; - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu(cpu, mask) { if (i) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); @@ -606,7 +606,7 @@ static ssize_t show_cpus(cpumask_t mask, char *buf) */ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) { - if (cpus_empty(policy->related_cpus)) + if (cpumask_empty(policy->related_cpus)) return show_cpus(policy->cpus, buf); return show_cpus(policy->related_cpus, buf); } @@ -806,9 +806,20 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) ret = -ENOMEM; goto nomem_out; } + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { + kfree(policy); + ret = -ENOMEM; + goto nomem_out; + } + if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { + free_cpumask_var(policy->cpus); + kfree(policy); + ret = -ENOMEM; + goto nomem_out; + } policy->cpu = cpu; - policy->cpus = cpumask_of_cpu(cpu); + cpumask_copy(policy->cpus, cpumask_of(cpu)); /* Initially set CPU itself as the policy_cpu */ per_cpu(policy_cpu, cpu) = cpu; @@ -843,7 +854,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } #endif - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { if (cpu == j) continue; @@ -861,7 +872,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) goto err_out_driver_exit; spin_lock_irqsave(&cpufreq_driver_lock, flags); - managed_policy->cpus = policy->cpus; + cpumask_copy(managed_policy->cpus, policy->cpus); per_cpu(cpufreq_cpu_data, cpu) = managed_policy; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -916,14 +927,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); /* symlink affected CPUs */ - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { if (j == cpu) continue; if (!cpu_online(j)) @@ -963,7 +974,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask_nr(j, policy->cpus) + for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -1024,7 +1035,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) */ if (unlikely(cpu != data->cpu)) { dprintk("removing link\n"); - cpu_clear(cpu, data->cpus); + cpumask_clear_cpu(cpu, data->cpus); spin_unlock_irqrestore(&cpufreq_driver_lock, flags); sysfs_remove_link(&sys_dev->kobj, "cpufreq"); cpufreq_cpu_put(data); @@ -1045,8 +1056,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) * per_cpu(cpufreq_cpu_data) while holding the lock, and remove * the sysfs links afterwards. */ - if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask_nr(j, data->cpus) { + if (unlikely(cpumask_weight(data->cpus) > 1)) { + for_each_cpu(j, data->cpus) { if (j == cpu) continue; per_cpu(cpufreq_cpu_data, j) = NULL; @@ -1055,8 +1066,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask_nr(j, data->cpus) { + if (unlikely(cpumask_weight(data->cpus) > 1)) { + for_each_cpu(j, data->cpus) { if (j == cpu) continue; dprintk("removing link for cpu %u\n", j); @@ -1090,7 +1101,10 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (cpufreq_driver->exit) cpufreq_driver->exit(data); + free_cpumask_var(data->related_cpus); + free_cpumask_var(data->cpus); kfree(data); + per_cpu(cpufreq_cpu_data, cpu) = NULL; cpufreq_debug_enable_ratelimit(); return 0; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e2657837d95..0320962c4ec 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -498,7 +498,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 2ab3c12b88a..6a2b036c938 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -400,7 +400,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Get Absolute Load - in terms of freq */ max_load_freq = 0; - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; cputime64_t cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; @@ -568,7 +568,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask_nr(j, policy->cpus) { + for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 484b3abf61b..384b38d3e8e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,8 +80,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* CPUs requiring sw coordination */ - cpumask_t related_cpus; /* CPUs with any coordination */ + cpumask_var_t cpus; /* CPUs requiring sw coordination */ + cpumask_var_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ -- cgit v1.2.3-70-g09d2 From 4d8bb5374924fc736ce275bfd1619b87a2106860 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 4 Jan 2009 05:18:08 -0800 Subject: cpumask: use cpumask_var_t in acpi-cpufreq.c Impact: cleanup, reduce stack usage, use new cpumask API. Replace the cpumask_t in struct drv_cmd with a cpumask_var_t. Remove unneeded online_policy_cpus cpumask_t in acpi_cpufreq_target. Update refs to use new cpumask API. Signed-off-by: Mike Travis Acked-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 0b31939862d..fb594170dc5 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_t mask; + cpumask_var_t mask; drv_addr_union addr; u32 val; }; @@ -193,7 +193,7 @@ static void drv_read(struct drv_cmd *cmd) cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, &cmd->mask); + set_cpus_allowed_ptr(current, cmd->mask); do_drv_read(cmd); set_cpus_allowed_ptr(current, &saved_mask); } @@ -203,8 +203,8 @@ static void drv_write(struct drv_cmd *cmd) cpumask_t saved_mask = current->cpus_allowed; unsigned int i; - for_each_cpu_mask_nr(i, cmd->mask) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + for_each_cpu(i, cmd->mask) { + set_cpus_allowed_ptr(current, cpumask_of(i)); do_drv_write(cmd); } @@ -212,22 +212,22 @@ static void drv_write(struct drv_cmd *cmd) return; } -static u32 get_cur_val(const cpumask_t *mask) +static u32 get_cur_val(const struct cpumask *mask) { struct acpi_processor_performance *perf; struct drv_cmd cmd; - if (unlikely(cpus_empty(*mask))) + if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) { + switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data; + perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -235,7 +235,7 @@ static u32 get_cur_val(const cpumask_t *mask) return 0; } - cmd.mask = *mask; + cpumask_copy(cmd.mask, mask); drv_read(&cmd); @@ -386,7 +386,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; struct drv_cmd cmd; unsigned int next_state = 0; /* Index into freq_table */ unsigned int next_perf_state = 0; /* Index into perf table */ @@ -401,20 +400,18 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } + if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) + return -ENOMEM; + perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, target_freq, relation, &next_state); - if (unlikely(result)) - return -ENODEV; - -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpumask_and(&online_policy_cpus, cpu_online_mask, policy->cpus); -#else - online_policy_cpus = policy->cpus; -#endif + if (unlikely(result)) { + result = -ENODEV; + goto out; + } next_perf_state = data->freq_table[next_state].index; if (perf->state == next_perf_state) { @@ -425,7 +422,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } else { dprintk("Already at target state (P%d)\n", next_perf_state); - return 0; + goto out; } } @@ -444,19 +441,19 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cmd.val = (u32) perf->states[next_perf_state].control; break; default: - return -ENODEV; + result = -ENODEV; + goto out; } - cpus_clear(cmd.mask); - + /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = online_policy_cpus; + cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); else - cpu_set(policy->cpu, cmd.mask); + cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; - for_each_cpu_mask_nr(i, cmd.mask) { + for_each_cpu(i, cmd.mask) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -464,19 +461,22 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, drv_write(&cmd); if (acpi_pstate_strict) { - if (!check_freqs(&cmd.mask, freqs.new, data)) { + if (!check_freqs(cmd.mask, freqs.new, data)) { dprintk("acpi_cpufreq_target failed (%d)\n", policy->cpu); - return -EAGAIN; + result = -EAGAIN; + goto out; } } - for_each_cpu_mask_nr(i, cmd.mask) { + for_each_cpu(i, cmd.mask) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } perf->state = next_perf_state; +out: + free_cpumask_var(cmd.mask); return result; } -- cgit v1.2.3-70-g09d2 From 7503bfbae89eba07b46441a5d1594647f6b8ab7d Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 4 Jan 2009 05:18:09 -0800 Subject: cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new cpumask API to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Signed-off-by: Mike Travis Acked-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index fb594170dc5..4e4f2b04dac 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -235,10 +231,15 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } + if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) + return 0; + cpumask_copy(cmd.mask, mask); drv_read(&cmd); + free_cpumask_var(cmd.mask); + dprintk("get_cur_val = %u\n", cmd.val); return cmd.val; -- cgit v1.2.3-70-g09d2 From e39ad415ac15116df213dfa2aa2a4f1b0857af9c Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 4 Jan 2009 05:18:10 -0800 Subject: cpumask: use work_on_cpu in acpi-cpufreq.c for read_measured_perf_ctrs Impact: use new cpumask API to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for read_measured_perf_ctrs(). Basically splits off the work function from get_measured_perf which is run on the designated cpu. Moves definition of struct perf_cur out of function local namespace, and is used as the work function argument. References in get_measured_perf use values in the perf_cur struct. Signed-off-by: Mike Travis Acked-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 83 ++++++++++++++++-------------- 1 file changed, 43 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4e4f2b04dac..06fcd8f9323 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -245,6 +245,30 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } +struct perf_cur { + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; +}; + + +static long read_measured_perf_ctrs(void *_cur) +{ + struct perf_cur *cur = _cur; + + rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0, 0); + wrmsr(MSR_IA32_MPERF, 0, 0); + + return 0; +} + /* * Return the measured active (C0) frequency on this CPU since last call * to this function. @@ -261,31 +285,12 @@ static u32 get_cur_val(const struct cpumask *mask) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf_cur, mperf_cur; - - cpumask_t saved_mask; + struct perf_cur cur; unsigned int perf_percent; unsigned int retval; - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (get_cpu() != cpu) { - /* We were not able to run on requested processor */ - put_cpu(); + if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur)) return 0; - } - - rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0,0); - wrmsr(MSR_IA32_MPERF, 0,0); #ifdef __i386__ /* @@ -293,37 +298,39 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, * Get an approximate value. Return failure in case we cannot get * an approximate value. */ - if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) { int shift_count; u32 h; - h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi); shift_count = fls(h); - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; + cur.aperf_cur.whole >>= shift_count; + cur.mperf_cur.whole >>= shift_count; } - if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) { int shift_count = 7; - aperf_cur.split.lo >>= shift_count; - mperf_cur.split.lo >>= shift_count; + cur.aperf_cur.split.lo >>= shift_count; + cur.mperf_cur.split.lo >>= shift_count; } - if (aperf_cur.split.lo && mperf_cur.split.lo) - perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo) + perf_percent = (cur.aperf_cur.split.lo * 100) / + cur.mperf_cur.split.lo; else perf_percent = 0; #else - if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) { int shift_count = 7; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; + cur.aperf_cur.whole >>= shift_count; + cur.mperf_cur.whole >>= shift_count; } - if (aperf_cur.whole && mperf_cur.whole) - perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + if (cur.aperf_cur.whole && cur.mperf_cur.whole) + perf_percent = (cur.aperf_cur.whole * 100) / + cur.mperf_cur.whole; else perf_percent = 0; @@ -331,10 +338,6 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; - put_cpu(); - set_cpus_allowed_ptr(current, &saved_mask); - - dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); return retval; } @@ -352,7 +355,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); + freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. -- cgit v1.2.3-70-g09d2 From 50c668d678fd01284799a6e4f1b91829d83cb9ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 10:49:53 +0100 Subject: Revert "cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write" This reverts commit 7503bfbae89eba07b46441a5d1594647f6b8ab7d. Dieter Ries reported bootup soft-hangs and bisected it back to this commit, and reverting this commit gave him a working system. The commit introduces work_on_cpu() use into the cpufreq code, but that is subtly problematic from a lock hierarchy POV: the hotplug-cpu lock is an highlevel lock that is taken before lowlevel locks, and in this codepath we are called with the policy lock taken. Dieter did not have lockdep enabled so we dont have a nice stack trace proof for this, but using work_on_cpu() in such a lowlevel place certainly looks wrong, so we revert the patch. work_on_cpu() needs to be reworked to be more generally usable. Reported-by: Dieter Ries Tested-by: Dieter Ries Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 06fcd8f9323..6f11e029e8c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,9 +150,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +static void do_drv_read(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -167,12 +166,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +static void do_drv_write(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -189,23 +186,30 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + set_cpus_allowed_ptr(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed_ptr(current, &saved_mask); } static void drv_write(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); + set_cpus_allowed_ptr(current, cpumask_of(i)); + do_drv_write(cmd); } + + set_cpus_allowed_ptr(current, &saved_mask); + return; } static u32 get_cur_val(const struct cpumask *mask) @@ -231,15 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return 0; - cpumask_copy(cmd.mask, mask); drv_read(&cmd); - free_cpumask_var(cmd.mask); - dprintk("get_cur_val = %u\n", cmd.val); return cmd.val; -- cgit v1.2.3-70-g09d2 From 4a922a969cb0190ce4580d4b064e2ac35f3ac9bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Jan 2009 16:11:00 +0100 Subject: x86, cpufreq: remove leftover copymask_copy() Impact: fix potential boot crash on MAXSMP Remove code left over by: 50c668d: Revert "cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read That cmd.cpumask is not allocated anymore. No impact on default !MAXSMP kernels. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c..8f3c95c7e61 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -235,8 +235,6 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); -- cgit v1.2.3-70-g09d2