diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/cpufreq/Kconfig | 29 | ||||
-rw-r--r-- | arch/ia64/kernel/cpufreq/Makefile | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 499 | ||||
-rw-r--r-- | arch/ia64/kernel/domain.c | 396 | ||||
-rw-r--r-- | arch/ia64/kernel/irq.c | 39 | ||||
-rw-r--r-- | arch/ia64/kernel/jprobes.S | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 124 | ||||
-rw-r--r-- | arch/ia64/kernel/sys_ia64.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 5 | ||||
-rw-r--r-- | arch/ia64/kernel/uncached.c | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/vmlinux.lds.S | 1 |
12 files changed, 637 insertions, 467 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index e1fb68ddec2..307514f7a28 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -16,10 +16,11 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig new file mode 100644 index 00000000000..2d9d5279b98 --- /dev/null +++ b/arch/ia64/kernel/cpufreq/Kconfig @@ -0,0 +1,29 @@ + +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config IA64_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +endif # CPU_FREQ + +endmenu + diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile new file mode 100644 index 00000000000..f748d34c02f --- /dev/null +++ b/arch/ia64/kernel/cpufreq/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_IA64_ACPI_CPUFREQ) += acpi-cpufreq.o diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c new file mode 100644 index 00000000000..da4d5cf80a4 --- /dev/null +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -0,0 +1,499 @@ +/* + * arch/ia64/kernel/cpufreq/acpi-cpufreq.c + * This file provides the ACPI based P-state support. This + * module works with generic cpufreq infrastructure. Most of + * the code is based on i386 version + * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c) + * + * Copyright (C) 2005 Intel Corp + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/pal.h> + +#include <linux/acpi.h> +#include <acpi/processor.h> + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +MODULE_AUTHOR("Venkatesh Pallipadi"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + + +struct cpufreq_acpi_io { + struct acpi_processor_performance acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int resume; +}; + +static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; + +static struct cpufreq_driver acpi_cpufreq_driver; + + +static int +processor_set_pstate ( + u32 value) +{ + s64 retval; + + dprintk("processor_set_pstate\n"); + + retval = ia64_pal_set_pstate((u64)value); + + if (retval) { + dprintk("Failed to set freq to 0x%x, with error 0x%x\n", + value, retval); + return -ENODEV; + } + return (int)retval; +} + + +static int +processor_get_pstate ( + u32 *value) +{ + u64 pstate_index = 0; + s64 retval; + + dprintk("processor_get_pstate\n"); + + retval = ia64_pal_get_pstate(&pstate_index); + *value = (u32) pstate_index; + + if (retval) + dprintk("Failed to get current freq with " + "error 0x%x, idx 0x%x\n", retval, *value); + + return (int)retval; +} + + +/* To be used only after data->acpi_data is initialized */ +static unsigned +extract_clock ( + struct cpufreq_acpi_io *data, + unsigned value, + unsigned int cpu) +{ + unsigned long i; + + dprintk("extract_clock\n"); + + for (i = 0; i < data->acpi_data.state_count; i++) { + if (value >= data->acpi_data.states[i].control) + return data->acpi_data.states[i].core_frequency; + } + return data->acpi_data.states[i-1].core_frequency; +} + + +static unsigned int +processor_get_freq ( + struct cpufreq_acpi_io *data, + unsigned int cpu) +{ + int ret = 0; + u32 value = 0; + cpumask_t saved_mask; + unsigned long clock_freq; + + dprintk("processor_get_freq\n"); + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + ret = -EAGAIN; + goto migrate_end; + } + + /* + * processor_get_pstate gets the average frequency since the + * last get. So, do two PAL_get_freq()... + */ + ret = processor_get_pstate(&value); + ret = processor_get_pstate(&value); + + if (ret) { + set_cpus_allowed(current, saved_mask); + printk(KERN_WARNING "get performance failed with error %d\n", + ret); + ret = -EAGAIN; + goto migrate_end; + } + clock_freq = extract_clock(data, value, cpu); + ret = (clock_freq*1000); + +migrate_end: + set_cpus_allowed(current, saved_mask); + return ret; +} + + +static int +processor_set_freq ( + struct cpufreq_acpi_io *data, + unsigned int cpu, + int state) +{ + int ret = 0; + u32 value = 0; + struct cpufreq_freqs cpufreq_freqs; + cpumask_t saved_mask; + int retval; + + dprintk("processor_set_freq\n"); + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + retval = -EAGAIN; + goto migrate_end; + } + + if (state == data->acpi_data.state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", state); + retval = 0; + goto migrate_end; + } + } + + dprintk("Transitioning from P%d to P%d\n", + data->acpi_data.state, state); + + /* cpufreq frequency struct */ + cpufreq_freqs.cpu = cpu; + cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; + cpufreq_freqs.new = data->freq_table[state].frequency; + + /* notify cpufreq */ + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + + /* + * First we write the target state's 'control' value to the + * control_register. + */ + + value = (u32) data->acpi_data.states[state].control; + + dprintk("Transitioning to state: 0x%08x\n", value); + + ret = processor_set_pstate(value); + if (ret) { + unsigned int tmp = cpufreq_freqs.new; + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + cpufreq_freqs.new = cpufreq_freqs.old; + cpufreq_freqs.old = tmp; + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + printk(KERN_WARNING "Transition failed with error %d\n", ret); + retval = -ENODEV; + goto migrate_end; + } + + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + + data->acpi_data.state = state; + + retval = 0; + +migrate_end: + set_cpus_allowed(current, saved_mask); + return (retval); +} + + +static unsigned int +acpi_cpufreq_get ( + unsigned int cpu) +{ + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + + dprintk("acpi_cpufreq_get\n"); + + return processor_get_freq(data, cpu); +} + + +static int +acpi_cpufreq_target ( + struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + unsigned int next_state = 0; + unsigned int result = 0; + + dprintk("acpi_cpufreq_setpolicy\n"); + + result = cpufreq_frequency_table_target(policy, + data->freq_table, target_freq, relation, &next_state); + if (result) + return (result); + + result = processor_set_freq(data, policy->cpu, next_state); + + return (result); +} + + +static int +acpi_cpufreq_verify ( + struct cpufreq_policy *policy) +{ + unsigned int result = 0; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + + dprintk("acpi_cpufreq_verify\n"); + + result = cpufreq_frequency_table_verify(policy, + data->freq_table); + + return (result); +} + + +/* + * processor_init_pdc - let BIOS know about the SMP capabilities + * of this driver + * @perf: processor-specific acpi_io_data struct + * @cpu: CPU being initialized + * + * To avoid issues with legacy OSes, some BIOSes require to be informed of + * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC + * accordingly. Actual call to _PDC is done in driver/acpi/processor.c + */ +static void +processor_init_pdc ( + struct acpi_processor_performance *perf, + unsigned int cpu, + struct acpi_object_list *obj_list + ) +{ + union acpi_object *obj; + u32 *buf; + + dprintk("processor_init_pdc\n"); + + perf->pdc = NULL; + /* Initialize pdc. It will be used later. */ + if (!obj_list) + return; + + if (!(obj_list->count && obj_list->pointer)) + return; + + obj = obj_list->pointer; + if ((obj->buffer.length == 12) && obj->buffer.pointer) { + buf = (u32 *)obj->buffer.pointer; + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; + perf->pdc = obj_list; + } + return; +} + + +static int +acpi_cpufreq_cpu_init ( + struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int cpu = policy->cpu; + struct cpufreq_acpi_io *data; + unsigned int result = 0; + + union acpi_object arg0 = {ACPI_TYPE_BUFFER}; + u32 arg0_buf[3]; + struct acpi_object_list arg_list = {1, &arg0}; + + dprintk("acpi_cpufreq_cpu_init\n"); + /* setup arg_list for _PDC settings */ + arg0.buffer.length = 12; + arg0.buffer.pointer = (u8 *) arg0_buf; + + data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + if (!data) + return (-ENOMEM); + + memset(data, 0, sizeof(struct cpufreq_acpi_io)); + + acpi_io_data[cpu] = data; + + processor_init_pdc(&data->acpi_data, cpu, &arg_list); + result = acpi_processor_register_performance(&data->acpi_data, cpu); + data->acpi_data.pdc = NULL; + + if (result) + goto err_free; + + /* capability check */ + if (data->acpi_data.state_count <= 1) { + dprintk("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if ((data->acpi_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE)) { + dprintk("Unsupported address space [%d, %d]\n", + (u32) (data->acpi_data.control_register.space_id), + (u32) (data->acpi_data.status_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + /* alloc freq_table */ + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (data->acpi_data.state_count + 1), + GFP_KERNEL); + if (!data->freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; i<data->acpi_data.state_count; i++) { + if ((data->acpi_data.states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) { + policy->cpuinfo.transition_latency = + data->acpi_data.states[i].transition_latency * 1000; + } + } + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + policy->cur = processor_get_freq(data, policy->cpu); + + /* table init */ + for (i = 0; i <= data->acpi_data.state_count; i++) + { + data->freq_table[i].index = i; + if (i < data->acpi_data.state_count) { + data->freq_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; + } else { + data->freq_table[i].frequency = CPUFREQ_TABLE_END; + } + } + + result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if (result) { + goto err_freqfree; + } + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management " + "activated.\n", cpu); + + for (i = 0; i < data->acpi_data.state_count; i++) + dprintk(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", + (i == data->acpi_data.state?'*':' '), i, + (u32) data->acpi_data.states[i].core_frequency, + (u32) data->acpi_data.states[i].power, + (u32) data->acpi_data.states[i].transition_latency, + (u32) data->acpi_data.states[i].bus_master_latency, + (u32) data->acpi_data.states[i].status, + (u32) data->acpi_data.states[i].control); + + cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* the first call to ->target() should result in us actually + * writing something to the appropriate registers. */ + data->resume = 1; + + return (result); + + err_freqfree: + kfree(data->freq_table); + err_unreg: + acpi_processor_unregister_performance(&data->acpi_data, cpu); + err_free: + kfree(data); + acpi_io_data[cpu] = NULL; + + return (result); +} + + +static int +acpi_cpufreq_cpu_exit ( + struct cpufreq_policy *policy) +{ + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + + dprintk("acpi_cpufreq_cpu_exit\n"); + + if (data) { + cpufreq_frequency_table_put_attr(policy->cpu); + acpi_io_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(&data->acpi_data, + policy->cpu); + kfree(data); + } + + return (0); +} + + +static struct freq_attr* acpi_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .get = acpi_cpufreq_get, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, +}; + + +static int __init +acpi_cpufreq_init (void) +{ + dprintk("acpi_cpufreq_init\n"); + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + + +static void __exit +acpi_cpufreq_exit (void) +{ + dprintk("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); + return; +} + + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); + diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c deleted file mode 100644 index bbb8efe126b..00000000000 --- a/arch/ia64/kernel/domain.c +++ /dev/null @@ -1,396 +0,0 @@ -/* - * arch/ia64/kernel/domain.c - * Architecture specific sched-domains builder. - * - * Copyright (C) 2004 Jesse Barnes - * Copyright (C) 2004 Silicon Graphics, Inc. - */ - -#include <linux/sched.h> -#include <linux/percpu.h> -#include <linux/slab.h> -#include <linux/cpumask.h> -#include <linux/init.h> -#include <linux/topology.h> -#include <linux/nodemask.h> - -#define SD_NODES_PER_DOMAIN 16 - -#ifdef CONFIG_NUMA -/** - * find_next_best_node - find the next node to include in a sched_domain - * @node: node whose sched_domain we're building - * @used_nodes: nodes already in the sched_domain - * - * Find the next node to include in a given scheduling domain. Simply - * finds the closest node not already in the @used_nodes map. - * - * Should use nodemask_t. - */ -static int find_next_best_node(int node, unsigned long *used_nodes) -{ - int i, n, val, min_val, best_node = 0; - - min_val = INT_MAX; - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Start at @node */ - n = (node + i) % MAX_NUMNODES; - - if (!nr_cpus_node(n)) - continue; - - /* Skip already used nodes */ - if (test_bit(n, used_nodes)) - continue; - - /* Simple min distance search */ - val = node_distance(node, n); - - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - set_bit(best_node, used_nodes); - return best_node; -} - -/** - * sched_domain_node_span - get a cpumask for a node's sched_domain - * @node: node whose cpumask we're constructing - * @size: number of nodes to include in this span - * - * Given a node, construct a good cpumask for its sched_domain to span. It - * should be one that prevents unnecessary balancing, but also spreads tasks - * out optimally. - */ -static cpumask_t sched_domain_node_span(int node) -{ - int i; - cpumask_t span, nodemask; - DECLARE_BITMAP(used_nodes, MAX_NUMNODES); - - cpus_clear(span); - bitmap_zero(used_nodes, MAX_NUMNODES); - - nodemask = node_to_cpumask(node); - cpus_or(span, span, nodemask); - set_bit(node, used_nodes); - - for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { - int next_node = find_next_best_node(node, used_nodes); - nodemask = node_to_cpumask(next_node); - cpus_or(span, span, nodemask); - } - - return span; -} -#endif - -/* - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we - * can switch it on easily if needed. - */ -#ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -static struct sched_group sched_group_cpus[NR_CPUS]; -static int cpu_to_cpu_group(int cpu) -{ - return cpu; -} -#endif - -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static struct sched_group sched_group_phys[NR_CPUS]; -static int cpu_to_phys_group(int cpu) -{ -#ifdef CONFIG_SCHED_SMT - return first_cpu(cpu_sibling_map[cpu]); -#else - return cpu; -#endif -} - -#ifdef CONFIG_NUMA -/* - * The init_sched_build_groups can't handle what we want to do with node - * groups, so roll our own. Now each node has its own list of groups which - * gets dynamically allocated. - */ -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static struct sched_group *sched_group_nodes[MAX_NUMNODES]; - -static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static struct sched_group sched_group_allnodes[MAX_NUMNODES]; - -static int cpu_to_allnodes_group(int cpu) -{ - return cpu_to_node(cpu); -} -#endif - -/* - * Build sched domains for a given set of cpus and attach the sched domains - * to the individual cpus - */ -void build_sched_domains(const cpumask_t *cpu_map) -{ - int i; - - /* - * Set up domains for cpus specified by the cpu_map. - */ - for_each_cpu_mask(i, *cpu_map) { - int group; - struct sched_domain *sd = NULL, *p; - cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - - cpus_and(nodemask, nodemask, *cpu_map); - -#ifdef CONFIG_NUMA - if (num_online_cpus() - > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { - sd = &per_cpu(allnodes_domains, i); - *sd = SD_ALLNODES_INIT; - sd->span = *cpu_map; - group = cpu_to_allnodes_group(i); - sd->groups = &sched_group_allnodes[group]; - p = sd; - } else - p = NULL; - - sd = &per_cpu(node_domains, i); - *sd = SD_NODE_INIT; - sd->span = sched_domain_node_span(cpu_to_node(i)); - sd->parent = p; - cpus_and(sd->span, sd->span, *cpu_map); -#endif - - p = sd; - sd = &per_cpu(phys_domains, i); - group = cpu_to_phys_group(i); - *sd = SD_CPU_INIT; - sd->span = nodemask; - sd->parent = p; - sd->groups = &sched_group_phys[group]; - -#ifdef CONFIG_SCHED_SMT - p = sd; - sd = &per_cpu(cpu_domains, i); - group = cpu_to_cpu_group(i); - *sd = SD_SIBLING_INIT; - sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, *cpu_map); - sd->parent = p; - sd->groups = &sched_group_cpus[group]; -#endif - } - -#ifdef CONFIG_SCHED_SMT - /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, *cpu_map) { - cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, *cpu_map); - if (i != first_cpu(this_sibling_map)) - continue; - - init_sched_build_groups(sched_group_cpus, this_sibling_map, - &cpu_to_cpu_group); - } -#endif - - /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - init_sched_build_groups(sched_group_phys, nodemask, - &cpu_to_phys_group); - } - -#ifdef CONFIG_NUMA - init_sched_build_groups(sched_group_allnodes, *cpu_map, - &cpu_to_allnodes_group); - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Set up node groups */ - struct sched_group *sg, *prev; - cpumask_t nodemask = node_to_cpumask(i); - cpumask_t domainspan; - cpumask_t covered = CPU_MASK_NONE; - int j; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, *cpu_map); - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - sched_group_nodes[i] = sg; - for_each_cpu_mask(j, nodemask) { - struct sched_domain *sd; - sd = &per_cpu(node_domains, j); - sd->groups = sg; - if (sd->groups == NULL) { - /* Turn off balancing if we have no groups */ - sd->flags = 0; - } - } - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", i); - continue; - } - sg->cpu_power = 0; - sg->cpumask = nodemask; - cpus_or(covered, covered, nodemask); - prev = sg; - - for (j = 0; j < MAX_NUMNODES; j++) { - cpumask_t tmp, notcovered; - int n = (i + j) % MAX_NUMNODES; - - cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, *cpu_map); - cpus_and(tmp, tmp, domainspan); - if (cpus_empty(tmp)) - break; - - nodemask = node_to_cpumask(n); - cpus_and(tmp, tmp, nodemask); - if (cpus_empty(tmp)) - continue; - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", j); - break; - } - sg->cpu_power = 0; - sg->cpumask = tmp; - cpus_or(covered, covered, tmp); - prev->next = sg; - prev = sg; - } - prev->next = sched_group_nodes[i]; - } -#endif - - /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, *cpu_map) { - int power; - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); - power = SCHED_LOAD_SCALE; - sd->groups->cpu_power = power; -#endif - - sd = &per_cpu(phys_domains, i); - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - -#ifdef CONFIG_NUMA - sd = &per_cpu(allnodes_domains, i); - if (sd->groups) { - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - } -#endif - } - -#ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *sg = sched_group_nodes[i]; - int j; - - if (sg == NULL) - continue; -next_sg: - for_each_cpu_mask(j, sg->cpumask) { - struct sched_domain *sd; - int power; - - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { - /* - * Only add "power" once for each - * physical package. - */ - continue; - } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - - sg->cpu_power += power; - } - sg = sg->next; - if (sg != sched_group_nodes[i]) - goto next_sg; - } -#endif - - /* Attach the domains */ - for_each_cpu_mask(i, *cpu_map) { - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); -#else - sd = &per_cpu(phys_domains, i); -#endif - cpu_attach_domain(sd, i); - } -} -/* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. - */ -void arch_init_sched_domains(const cpumask_t *cpu_map) -{ - cpumask_t cpu_default_map; - - /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. - */ - cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); - - build_sched_domains(&cpu_default_map); -} - -void arch_destroy_sched_domains(const cpumask_t *cpu_map) -{ -#ifdef CONFIG_NUMA - int i; - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - struct sched_group *oldsg, *sg = sched_group_nodes[i]; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - if (sg == NULL) - continue; - sg = sg->next; -next_sg: - oldsg = sg; - sg = sg->next; - kfree(oldsg); - if (oldsg != sched_group_nodes[i]) - goto next_sg; - sched_group_nodes[i] = NULL; - } -#endif -} - diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 28f2aadc38d..205d9802826 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -91,23 +91,8 @@ skip: } #ifdef CONFIG_SMP -/* - * This is updated when the user sets irq affinity via /proc - */ -static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; -static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; - static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; -/* - * Arch specific routine for deferred write to iosapic rte to reprogram - * intr destination. - */ -void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) -{ - pending_irq_cpumask[irq] = mask_val; -} - void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { cpumask_t mask = CPU_MASK_NONE; @@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) if (irq < NR_IRQS) { irq_affinity[irq] = mask; + set_irq_info(irq, mask); irq_redir[irq] = (char) (redir & 0xff); } } - - -void move_irq(int irq) -{ - /* note - we hold desc->lock */ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - int redir = test_bit(irq, pending_irq_redir); - - if (unlikely(!desc->handler->set_affinity)) - return; - - if (!cpus_empty(pending_irq_cpumask[irq])) { - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - if (unlikely(!cpus_empty(tmp))) { - desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), - pending_irq_cpumask[irq]); - } - cpus_clear(pending_irq_cpumask[irq]); - } -} - - #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S index b7fa3ccd2b0..2323377e369 100644 --- a/arch/ia64/kernel/jprobes.S +++ b/arch/ia64/kernel/jprobes.S @@ -49,6 +49,7 @@ /* * void jprobe_break(void) */ + .section .kprobes.text, "ax" ENTRY(jprobe_break) break.m 0x80300 END(jprobe_break) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 884f5cd27d8..471086b808a 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -87,12 +87,25 @@ static enum instruction_type bundle_encoding[32][3] = { * is IP relative instruction and update the kprobe * inst flag accordingly */ -static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode, - unsigned long kprobe_inst, struct kprobe *p) +static void __kprobes update_kprobe_inst_flag(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p) { p->ainsn.inst_flag = 0; p->ainsn.target_br_reg = 0; + /* Check for Break instruction + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) { + /* is a break instruction */ + p->ainsn.inst_flag |= INST_FLAG_BREAK_INST; + return; + } + if (bundle_encoding[template][slot] == B) { switch (major_opcode) { case INDIRECT_CALL_OPCODE: @@ -126,8 +139,10 @@ static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode * Returns 0 if supported * Returns -EINVAL if unsupported */ -static int unsupported_inst(uint template, uint slot, uint major_opcode, - unsigned long kprobe_inst, struct kprobe *p) +static int __kprobes unsupported_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p) { unsigned long addr = (unsigned long)p->addr; @@ -168,8 +183,9 @@ static int unsupported_inst(uint template, uint slot, uint major_opcode, * on which we are inserting kprobe is cmp instruction * with ctype as unc. */ -static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode, -unsigned long kprobe_inst) +static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst) { cmp_inst_t cmp_inst; uint ctype_unc = 0; @@ -201,8 +217,10 @@ out: * In this function we override the bundle with * the break instruction at the given slot. */ -static void prepare_break_inst(uint template, uint slot, uint major_opcode, - unsigned long kprobe_inst, struct kprobe *p) +static void __kprobes prepare_break_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p) { unsigned long break_inst = BREAK_INST; bundle_t *bundle = &p->ainsn.insn.bundle; @@ -271,7 +289,8 @@ static inline int in_ivt_functions(unsigned long addr) && addr < (unsigned long)__end_ivt_text); } -static int valid_kprobe_addr(int template, int slot, unsigned long addr) +static int __kprobes valid_kprobe_addr(int template, int slot, + unsigned long addr) { if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { printk(KERN_WARNING "Attempting to insert unaligned kprobe " @@ -323,7 +342,7 @@ static void kretprobe_trampoline(void) * - cleanup by marking the instance as unused * - long jump back to the original return address */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -381,7 +400,8 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { struct kretprobe_instance *ri; @@ -399,7 +419,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) } } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long) p->addr; unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); @@ -430,7 +450,7 @@ int arch_prepare_kprobe(struct kprobe *p) return 0; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; @@ -439,7 +459,7 @@ void arch_arm_kprobe(struct kprobe *p) flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; @@ -449,7 +469,7 @@ void arch_disarm_kprobe(struct kprobe *p) flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { } @@ -461,7 +481,7 @@ void arch_remove_kprobe(struct kprobe *p) * to original stack address, handle the case where we need to fixup the * relative IP address and/or fixup branch register. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; @@ -528,13 +548,16 @@ turn_ss_off: ia64_psr(regs)->ss = 0; } -static void prepare_ss(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) { unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; unsigned long slot = (unsigned long)p->addr & 0xf; - /* Update instruction pointer (IIP) and slot number (IPSR.ri) */ - regs->cr_iip = bundle_addr & ~0xFULL; + /* single step inline if break instruction */ + if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST) + regs->cr_iip = (unsigned long)p->addr & ~0xFULL; + else + regs->cr_iip = bundle_addr & ~0xFULL; if (slot > 2) slot = 0; @@ -545,7 +568,39 @@ static void prepare_ss(struct kprobe *p, struct pt_regs *regs) ia64_psr(regs)->ss = 1; } -static int pre_kprobes_handler(struct die_args *args) +static int __kprobes is_ia64_break_inst(struct pt_regs *regs) +{ + unsigned int slot = ia64_psr(regs)->ri; + unsigned int template, major_opcode; + unsigned long kprobe_inst; + unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; + bundle_t bundle; + + memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); + template = bundle.quad0.template; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get Kprobe probe instruction at given slot*/ + get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode); + + /* For break instruction, + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) { + /* Not a break instruction */ + return 0; + } + + /* Is a break instruction */ + return 1; +} + +static int __kprobes pre_kprobes_handler(struct die_args *args) { struct kprobe *p; int ret = 0; @@ -558,7 +613,9 @@ static int pre_kprobes_handler(struct die_args *args) if (kprobe_running()) { p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if ( (kprobe_status == KPROBE_HIT_SS) && + (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { + ia64_psr(regs)->ss = 0; unlock_kprobes(); goto no_kprobe; } @@ -592,6 +649,19 @@ static int pre_kprobes_handler(struct die_args *args) p = get_kprobe(addr); if (!p) { unlock_kprobes(); + if (!is_ia64_break_inst(regs)) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + + } + + /* Not one of our break, let kernel handle it */ goto no_kprobe; } @@ -616,7 +686,7 @@ no_kprobe: return ret; } -static int post_kprobes_handler(struct pt_regs *regs) +static int __kprobes post_kprobes_handler(struct pt_regs *regs) { if (!kprobe_running()) return 0; @@ -641,7 +711,7 @@ out: return 1; } -static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) +static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) { if (!kprobe_running()) return 0; @@ -659,8 +729,8 @@ static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) return 0; } -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch(val) { @@ -681,7 +751,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr = ((struct fnptr *)(jp->entry))->ip; @@ -703,7 +773,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { *regs = jprobe_saved_regs; return 1; diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 770fab37928..f2dbcd1db0d 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -35,7 +35,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len return -ENOMEM; #ifdef CONFIG_HUGETLB_PAGE - if (REGION_NUMBER(addr) == REGION_HPAGE) + if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif if (!addr) diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 4440c8343fa..f970359e7ed 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -15,6 +15,7 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/module.h> /* for EXPORT_SYMBOL */ #include <linux/hardirq.h> +#include <linux/kprobes.h> #include <asm/fpswa.h> #include <asm/ia32.h> @@ -122,7 +123,7 @@ die_if_kernel (char *str, struct pt_regs *regs, long err) } void -ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) { siginfo_t siginfo; int sig, code; @@ -444,7 +445,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, return rv; } -void +void __kprobes ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, unsigned long iim, unsigned long itir, long arg5, long arg6, long arg7, struct pt_regs regs) diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 490dfc9ab47..4e9d06c48a8 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -184,7 +184,7 @@ uncached_free_page(unsigned long maddr) { int node; - node = nasid_to_cnodeid(NASID_GET(maddr)); + node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET); dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); @@ -217,7 +217,7 @@ uncached_build_memmap(unsigned long start, unsigned long end, void *arg) memset((char *)vstart, 0, length); - node = nasid_to_cnodeid(NASID_GET(start)); + node = paddr_to_nid(start); for (; vstart < vend ; vstart += PAGE_SIZE) { dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index a676e79e068..30d8564e960 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -48,6 +48,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.gnu.linkonce.t*) } .text2 : AT(ADDR(.text2) - LOAD_OFFSET) |