diff options
Diffstat (limited to 'arch/sparc/kernel')
-rw-r--r-- | arch/sparc/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc/kernel/cpumap.c | 431 | ||||
-rw-r--r-- | arch/sparc/kernel/cpumap.h | 16 | ||||
-rw-r--r-- | arch/sparc/kernel/dma.c | 127 | ||||
-rw-r--r-- | arch/sparc/kernel/ds.c | 3 | ||||
-rw-r--r-- | arch/sparc/kernel/ftrace.c | 47 | ||||
-rw-r--r-- | arch/sparc/kernel/head_64.S | 22 | ||||
-rw-r--r-- | arch/sparc/kernel/iommu.c | 15 | ||||
-rw-r--r-- | arch/sparc/kernel/irq_64.c | 29 | ||||
-rw-r--r-- | arch/sparc/kernel/mdesc.c | 149 | ||||
-rw-r--r-- | arch/sparc/kernel/of_device_32.c | 195 | ||||
-rw-r--r-- | arch/sparc/kernel/of_device_64.c | 188 | ||||
-rw-r--r-- | arch/sparc/kernel/of_device_common.c | 174 | ||||
-rw-r--r-- | arch/sparc/kernel/of_device_common.h | 36 | ||||
-rw-r--r-- | arch/sparc/kernel/pci_sun4v.c | 15 | ||||
-rw-r--r-- | arch/sparc/kernel/prom.h | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/prom_64.c | 232 | ||||
-rw-r--r-- | arch/sparc/kernel/prom_common.c | 2 | ||||
-rw-r--r-- | arch/sparc/kernel/smp_64.c | 196 | ||||
-rw-r--r-- | arch/sparc/kernel/systbls_32.S | 4 | ||||
-rw-r--r-- | arch/sparc/kernel/systbls_64.S | 6 | ||||
-rw-r--r-- | arch/sparc/kernel/traps_64.c | 170 |
22 files changed, 1234 insertions, 826 deletions
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 54742e58831..475ce4696ac 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -37,6 +37,7 @@ obj-y += una_asm_$(BITS).o obj-$(CONFIG_SPARC32) += muldiv.o obj-y += prom_common.o obj-y += prom_$(BITS).o +obj-y += of_device_common.o obj-y += of_device_$(BITS).o obj-$(CONFIG_SPARC64) += prom_irqtrans.o @@ -54,6 +55,7 @@ obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o +obj-$(CONFIG_SPARC64_SMP) += cpumap.o # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c new file mode 100644 index 00000000000..7430ed080b2 --- /dev/null +++ b/arch/sparc/kernel/cpumap.c @@ -0,0 +1,431 @@ +/* cpumap.c: used for optimizing CPU assignment + * + * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/cpumask.h> +#include <linux/spinlock.h> +#include <asm/cpudata.h> +#include "cpumap.h" + + +enum { + CPUINFO_LVL_ROOT = 0, + CPUINFO_LVL_NODE, + CPUINFO_LVL_CORE, + CPUINFO_LVL_PROC, + CPUINFO_LVL_MAX, +}; + +enum { + ROVER_NO_OP = 0, + /* Increment rover every time level is visited */ + ROVER_INC_ON_VISIT = 1 << 0, + /* Increment parent's rover every time rover wraps around */ + ROVER_INC_PARENT_ON_LOOP = 1 << 1, +}; + +struct cpuinfo_node { + int id; + int level; + int num_cpus; /* Number of CPUs in this hierarchy */ + int parent_index; + int child_start; /* Array index of the first child node */ + int child_end; /* Array index of the last child node */ + int rover; /* Child node iterator */ +}; + +struct cpuinfo_level { + int start_index; /* Index of first node of a level in a cpuinfo tree */ + int end_index; /* Index of last node of a level in a cpuinfo tree */ + int num_nodes; /* Number of nodes in a level in a cpuinfo tree */ +}; + +struct cpuinfo_tree { + int total_nodes; + + /* Offsets into nodes[] for each level of the tree */ + struct cpuinfo_level level[CPUINFO_LVL_MAX]; + struct cpuinfo_node nodes[0]; +}; + + +static struct cpuinfo_tree *cpuinfo_tree; + +static u16 cpu_distribution_map[NR_CPUS]; +static DEFINE_SPINLOCK(cpu_map_lock); + + +/* Niagara optimized cpuinfo tree traversal. */ +static const int niagara_iterate_method[] = { + [CPUINFO_LVL_ROOT] = ROVER_NO_OP, + + /* Strands (or virtual CPUs) within a core may not run concurrently + * on the Niagara, as instruction pipeline(s) are shared. Distribute + * work to strands in different cores first for better concurrency. + * Go to next NUMA node when all cores are used. + */ + [CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP, + + /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e. + * a proc_id represents an instruction pipeline. Distribute work to + * strands in different proc_id groups if the core has multiple + * instruction pipelines (e.g. the Niagara 2/2+ has two). + */ + [CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT, + + /* Pick the next strand in the proc_id group. */ + [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT, +}; + +/* Generic cpuinfo tree traversal. Distribute work round robin across NUMA + * nodes. + */ +static const int generic_iterate_method[] = { + [CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT, + [CPUINFO_LVL_NODE] = ROVER_NO_OP, + [CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP, + [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP, +}; + + +static int cpuinfo_id(int cpu, int level) +{ + int id; + + switch (level) { + case CPUINFO_LVL_ROOT: + id = 0; + break; + case CPUINFO_LVL_NODE: + id = cpu_to_node(cpu); + break; + case CPUINFO_LVL_CORE: + id = cpu_data(cpu).core_id; + break; + case CPUINFO_LVL_PROC: + id = cpu_data(cpu).proc_id; + break; + default: + id = -EINVAL; + } + return id; +} + +/* + * Enumerate the CPU information in __cpu_data to determine the start index, + * end index, and number of nodes for each level in the cpuinfo tree. The + * total number of cpuinfo nodes required to build the tree is returned. + */ +static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level) +{ + int prev_id[CPUINFO_LVL_MAX]; + int i, n, num_nodes; + + for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) { + struct cpuinfo_level *lv = &tree_level[i]; + + prev_id[i] = -1; + lv->start_index = lv->end_index = lv->num_nodes = 0; + } + + num_nodes = 1; /* Include the root node */ + + for (i = 0; i < num_possible_cpus(); i++) { + if (!cpu_online(i)) + continue; + + n = cpuinfo_id(i, CPUINFO_LVL_NODE); + if (n > prev_id[CPUINFO_LVL_NODE]) { + tree_level[CPUINFO_LVL_NODE].num_nodes++; + prev_id[CPUINFO_LVL_NODE] = n; + num_nodes++; + } + n = cpuinfo_id(i, CPUINFO_LVL_CORE); + if (n > prev_id[CPUINFO_LVL_CORE]) { + tree_level[CPUINFO_LVL_CORE].num_nodes++; + prev_id[CPUINFO_LVL_CORE] = n; + num_nodes++; + } + n = cpuinfo_id(i, CPUINFO_LVL_PROC); + if (n > prev_id[CPUINFO_LVL_PROC]) { + tree_level[CPUINFO_LVL_PROC].num_nodes++; + prev_id[CPUINFO_LVL_PROC] = n; + num_nodes++; + } + } + + tree_level[CPUINFO_LVL_ROOT].num_nodes = 1; + + n = tree_level[CPUINFO_LVL_NODE].num_nodes; + tree_level[CPUINFO_LVL_NODE].start_index = 1; + tree_level[CPUINFO_LVL_NODE].end_index = n; + + n++; + tree_level[CPUINFO_LVL_CORE].start_index = n; + n += tree_level[CPUINFO_LVL_CORE].num_nodes; + tree_level[CPUINFO_LVL_CORE].end_index = n - 1; + + tree_level[CPUINFO_LVL_PROC].start_index = n; + n += tree_level[CPUINFO_LVL_PROC].num_nodes; + tree_level[CPUINFO_LVL_PROC].end_index = n - 1; + + return num_nodes; +} + +/* Build a tree representation of the CPU hierarchy using the per CPU + * information in __cpu_data. Entries in __cpu_data[0..NR_CPUS] are + * assumed to be sorted in ascending order based on node, core_id, and + * proc_id (in order of significance). + */ +static struct cpuinfo_tree *build_cpuinfo_tree(void) +{ + struct cpuinfo_tree *new_tree; + struct cpuinfo_node *node; + struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX]; + int num_cpus[CPUINFO_LVL_MAX]; + int level_rover[CPUINFO_LVL_MAX]; + int prev_id[CPUINFO_LVL_MAX]; + int n, id, cpu, prev_cpu, last_cpu, level; + + n = enumerate_cpuinfo_nodes(tmp_level); + + new_tree = kzalloc(sizeof(struct cpuinfo_tree) + + (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC); + if (!new_tree) + return NULL; + + new_tree->total_nodes = n; + memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); + + prev_cpu = cpu = first_cpu(cpu_online_map); + + /* Initialize all levels in the tree with the first CPU */ + for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { + n = new_tree->level[level].start_index; + + level_rover[level] = n; + node = &new_tree->nodes[n]; + + id = cpuinfo_id(cpu, level); + if (unlikely(id < 0)) { + kfree(new_tree); + return NULL; + } + node->id = id; + node->level = level; + node->num_cpus = 1; + + node->parent_index = (level > CPUINFO_LVL_ROOT) + ? new_tree->level[level - 1].start_index : -1; + + node->child_start = node->child_end = node->rover = + (level == CPUINFO_LVL_PROC) + ? cpu : new_tree->level[level + 1].start_index; + + prev_id[level] = node->id; + num_cpus[level] = 1; + } + + for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) { + if (cpu_online(last_cpu)) + break; + } + + while (++cpu <= last_cpu) { + if (!cpu_online(cpu)) + continue; + + for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; + level--) { + id = cpuinfo_id(cpu, level); + if (unlikely(id < 0)) { + kfree(new_tree); + return NULL; + } + + if ((id != prev_id[level]) || (cpu == last_cpu)) { + prev_id[level] = id; + node = &new_tree->nodes[level_rover[level]]; + node->num_cpus = num_cpus[level]; + num_cpus[level] = 1; + + if (cpu == last_cpu) + node->num_cpus++; + + /* Connect tree node to parent */ + if (level == CPUINFO_LVL_ROOT) + node->parent_index = -1; + else + node->parent_index = + level_rover[level - 1]; + + if (level == CPUINFO_LVL_PROC) { + node->child_end = + (cpu == last_cpu) ? cpu : prev_cpu; + } else { + node->child_end = + level_rover[level + 1] - 1; + } + + /* Initialize the next node in the same level */ + n = ++level_rover[level]; + if (n <= new_tree->level[level].end_index) { + node = &new_tree->nodes[n]; + node->id = id; + node->level = level; + + /* Connect node to child */ + node->child_start = node->child_end = + node->rover = + (level == CPUINFO_LVL_PROC) + ? cpu : level_rover[level + 1]; + } + } else + num_cpus[level]++; + } + prev_cpu = cpu; + } + + return new_tree; +} + +static void increment_rover(struct cpuinfo_tree *t, int node_index, + int root_index, const int *rover_inc_table) +{ + struct cpuinfo_node *node = &t->nodes[node_index]; + int top_level, level; + + top_level = t->nodes[root_index].level; + for (level = node->level; level >= top_level; level--) { + node->rover++; + if (node->rover <= node->child_end) + return; + + node->rover = node->child_start; + /* If parent's rover does not need to be adjusted, stop here. */ + if ((level == top_level) || + !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP)) + return; + + node = &t->nodes[node->parent_index]; + } +} + +static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) +{ + const int *rover_inc_table; + int level, new_index, index = root_index; + + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + rover_inc_table = niagara_iterate_method; + break; + default: + rover_inc_table = generic_iterate_method; + } + + for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX; + level++) { + new_index = t->nodes[index].rover; + if (rover_inc_table[level] & ROVER_INC_ON_VISIT) + increment_rover(t, index, root_index, rover_inc_table); + + index = new_index; + } + return index; +} + +static void _cpu_map_rebuild(void) +{ + int i; + + if (cpuinfo_tree) { + kfree(cpuinfo_tree); + cpuinfo_tree = NULL; + } + + cpuinfo_tree = build_cpuinfo_tree(); + if (!cpuinfo_tree) + return; + + /* Build CPU distribution map that spans all online CPUs. No need + * to check if the CPU is online, as that is done when the cpuinfo + * tree is being built. + */ + for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++) + cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0); +} + +/* Fallback if the cpuinfo tree could not be built. CPU mapping is linear + * round robin. + */ +static int simple_map_to_cpu(unsigned int index) +{ + int i, end, cpu_rover; + + cpu_rover = 0; + end = index % num_online_cpus(); + for (i = 0; i < num_possible_cpus(); i++) { + if (cpu_online(cpu_rover)) { + if (cpu_rover >= end) + return cpu_rover; + + cpu_rover++; + } + } + + /* Impossible, since num_online_cpus() <= num_possible_cpus() */ + return first_cpu(cpu_online_map); +} + +static int _map_to_cpu(unsigned int index) +{ + struct cpuinfo_node *root_node; + + if (unlikely(!cpuinfo_tree)) { + _cpu_map_rebuild(); + if (!cpuinfo_tree) + return simple_map_to_cpu(index); + } + + root_node = &cpuinfo_tree->nodes[0]; +#ifdef CONFIG_HOTPLUG_CPU + if (unlikely(root_node->num_cpus != num_online_cpus())) { + _cpu_map_rebuild(); + if (!cpuinfo_tree) + return simple_map_to_cpu(index); + } +#endif + return cpu_distribution_map[index % root_node->num_cpus]; +} + +int map_to_cpu(unsigned int index) +{ + int mapped_cpu; + unsigned long flag; + + spin_lock_irqsave(&cpu_map_lock, flag); + mapped_cpu = _map_to_cpu(index); + +#ifdef CONFIG_HOTPLUG_CPU + while (unlikely(!cpu_online(mapped_cpu))) + mapped_cpu = _map_to_cpu(index); +#endif + spin_unlock_irqrestore(&cpu_map_lock, flag); + return mapped_cpu; +} +EXPORT_SYMBOL(map_to_cpu); + +void cpu_map_rebuild(void) +{ + unsigned long flag; + + spin_lock_irqsave(&cpu_map_lock, flag); + _cpu_map_rebuild(); + spin_unlock_irqrestore(&cpu_map_lock, flag); +} diff --git a/arch/sparc/kernel/cpumap.h b/arch/sparc/kernel/cpumap.h new file mode 100644 index 00000000000..e639880ab86 --- /dev/null +++ b/arch/sparc/kernel/cpumap.h @@ -0,0 +1,16 @@ +#ifndef _CPUMAP_H +#define _CPUMAP_H + +#ifdef CONFIG_SMP +extern void cpu_map_rebuild(void); +extern int map_to_cpu(unsigned int index); +#define cpu_map_init() cpu_map_rebuild() +#else +#define cpu_map_init() do {} while (0) +static inline int map_to_cpu(unsigned int index) +{ + return raw_smp_processor_id(); +} +#endif + +#endif diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c index ebc8403b035..524c32f97c5 100644 --- a/arch/sparc/kernel/dma.c +++ b/arch/sparc/kernel/dma.c @@ -35,8 +35,8 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) +static void *dma32_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) @@ -44,10 +44,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, #endif return sbus_alloc_consistent(dev, size, dma_handle); } -EXPORT_SYMBOL(dma_alloc_coherent); -void dma_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) +static void dma32_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -58,38 +57,10 @@ void dma_free_coherent(struct device *dev, size_t size, #endif sbus_free_consistent(dev, size, cpu_addr, dma_handle); } -EXPORT_SYMBOL(dma_free_coherent); -dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, - size_t size, enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_map_single(to_pci_dev(dev), cpu_addr, - size, (int)direction); -#endif - return sbus_map_single(dev, cpu_addr, size, (int)direction); -} -EXPORT_SYMBOL(dma_map_single); - -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, - enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_unmap_single(to_pci_dev(dev), dma_addr, - size, (int)direction); - return; - } -#endif - sbus_unmap_single(dev, dma_addr, size, (int)direction); -} -EXPORT_SYMBOL(dma_unmap_single); - -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) +static dma_addr_t dma32_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) @@ -99,10 +70,9 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, return sbus_map_single(dev, page_address(page) + offset, size, (int)direction); } -EXPORT_SYMBOL(dma_map_page); -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction direction) +static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -113,10 +83,9 @@ void dma_unmap_page(struct device *dev, dma_addr_t dma_address, #endif sbus_unmap_single(dev, dma_address, size, (int)direction); } -EXPORT_SYMBOL(dma_unmap_page); -int dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) +static int dma32_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) @@ -124,10 +93,9 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, #endif return sbus_map_sg(dev, sg, nents, direction); } -EXPORT_SYMBOL(dma_map_sg); -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) +void dma32_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -137,10 +105,10 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, #endif sbus_unmap_sg(dev, sg, nents, (int)direction); } -EXPORT_SYMBOL(dma_unmap_sg); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) +static void dma32_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, + enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -151,10 +119,10 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, #endif sbus_dma_sync_single_for_cpu(dev, dma_handle, size, (int) direction); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) +static void dma32_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -165,28 +133,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, #endif sbus_dma_sync_single_for_device(dev, dma_handle, size, (int) direction); } -EXPORT_SYMBOL(dma_sync_single_for_device); - -void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - enum dma_data_direction direction) -{ - dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction); -} -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); - -void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - dma_sync_single_for_device(dev, dma_handle+offset, size, direction); -} -EXPORT_SYMBOL(dma_sync_single_range_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction direction) +static void dma32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -197,11 +146,10 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, #endif BUG(); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction direction) +static void dma32_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, + enum dma_data_direction direction) { #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) { @@ -212,16 +160,19 @@ void dma_sync_sg_for_device(struct device *dev, #endif BUG(); } -EXPORT_SYMBOL(dma_sync_sg_for_device); -int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return (dma_addr == DMA_ERROR_CODE); -} -EXPORT_SYMBOL(dma_mapping_error); - -int dma_get_cache_alignment(void) -{ - return 32; -} -EXPORT_SYMBOL(dma_get_cache_alignment); +static const struct dma_ops dma32_dma_ops = { + .alloc_coherent = dma32_alloc_coherent, + .free_coherent = dma32_free_coherent, + .map_page = dma32_map_page, + .unmap_page = dma32_unmap_page, + .map_sg = dma32_map_sg, + .unmap_sg = dma32_unmap_sg, + .sync_single_for_cpu = dma32_sync_single_for_cpu, + .sync_single_for_device = dma32_sync_single_for_device, + .sync_sg_for_cpu = dma32_sync_sg_for_cpu, + .sync_sg_for_device = dma32_sync_sg_for_device, +}; + +const struct dma_ops *dma_ops = &dma32_dma_ops; +EXPORT_SYMBOL(dma_ops); diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 90350f838f0..4a700f4b79c 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -544,7 +544,8 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, resp_len, ncpus, mask, DR_CPU_STAT_CONFIGURED); - mdesc_fill_in_cpu_data(*mask); + mdesc_populate_present_mask(mask); + mdesc_fill_in_cpu_data(mask); for_each_cpu_mask(cpu, *mask) { int err; diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index d0218e73f98..d3b1a307656 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -7,14 +7,10 @@ #include <asm/ftrace.h> +#ifdef CONFIG_DYNAMIC_FTRACE static const u32 ftrace_nop = 0x01000000; -unsigned char *ftrace_nop_replace(void) -{ - return (char *)&ftrace_nop; -} - -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static u32 ftrace_call_replace(unsigned long ip, unsigned long addr) { static u32 call; s32 off; @@ -22,15 +18,11 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) off = ((s32)addr - (s32)ip); call = 0x40000000 | ((u32)off >> 2); - return (unsigned char *) &call; + return call; } -int -ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code) +static int ftrace_modify_code(unsigned long ip, u32 old, u32 new) { - u32 old = *(u32 *)old_code; - u32 new = *(u32 *)new_code; u32 replaced; int faulted; @@ -59,18 +51,43 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + u32 old, new; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop; + return ftrace_modify_code(ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + u32 old, new; + + old = ftrace_nop; + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new); +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char old[MCOUNT_INSN_SIZE], *new; + u32 old, new; - memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); + old = *(u32 *) &ftrace_call; new = ftrace_call_replace(ip, (unsigned long)func); return ftrace_modify_code(ip, old, new); } int __init ftrace_dyn_arch_init(void *data) { - ftrace_mcount_set(data); + unsigned long *p = data; + + *p = 0; + return 0; } +#endif + diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 91bf4c7f79b..f8f21050448 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -641,28 +641,6 @@ tlb_fixup_done: /* Not reached... */ 1: - /* If we boot on a non-zero cpu, all of the per-cpu - * variable references we make before setting up the - * per-cpu areas will use a bogus offset. Put a - * compensating factor into __per_cpu_base to handle - * this cleanly. - * - * What the per-cpu code calculates is: - * - * __per_cpu_base + (cpu << __per_cpu_shift) - * - * These two variables are zero initially, so to - * make it all cancel out to zero we need to put - * "0 - (cpu << 0)" into __per_cpu_base so that the - * above formula evaluates to zero. - * - * We cannot even perform a printk() until this stuff - * is setup as that calls cpu_clock() which uses - * per-cpu variables. - */ - sub %g0, %o0, %o1 - sethi %hi(__per_cpu_base), %o2 - stx %o1, [%o2 + %lo(__per_cpu_base)] #else mov 0, %o0 #endif diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index d8900e1d5aa..0aeaefe696b 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -351,8 +351,9 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)cpu, order); } -static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz, - enum dma_data_direction direction) +static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t sz, + enum dma_data_direction direction) { struct iommu *iommu; struct strbuf *strbuf; @@ -368,7 +369,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz, if (unlikely(direction == DMA_NONE)) goto bad_no_ctx; - oaddr = (unsigned long)ptr; + oaddr = (unsigned long)(page_address(page) + offset); npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; @@ -472,8 +473,8 @@ do_flush_sync: vaddr, ctx, npages); } -static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr, - size_t sz, enum dma_data_direction direction) +static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, + size_t sz, enum dma_data_direction direction) { struct iommu *iommu; struct strbuf *strbuf; @@ -824,8 +825,8 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, static const struct dma_ops sun4u_dma_ops = { .alloc_coherent = dma_4u_alloc_coherent, .free_coherent = dma_4u_free_coherent, - .map_single = dma_4u_map_single, - .unmap_single = dma_4u_unmap_single, + .map_page = dma_4u_map_page, + .unmap_page = dma_4u_unmap_page, .map_sg = dma_4u_map_sg, .unmap_sg = dma_4u_unmap_sg, .sync_single_for_cpu = dma_4u_sync_single_for_cpu, diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e5e78f9cfc9..bd075054942 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -45,6 +45,7 @@ #include <asm/cacheflush.h> #include "entry.h" +#include "cpumap.h" #define NUM_IVECS (IMAP_INR + 1) @@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq) int cpuid; cpumask_copy(&mask, irq_desc[virt_irq].affinity); - if (cpus_equal(mask, CPU_MASK_ALL)) { - static int irq_rover; - static DEFINE_SPINLOCK(irq_rover_lock); - unsigned long flags; - - /* Round-robin distribution... */ - do_round_robin: - spin_lock_irqsave(&irq_rover_lock, flags); - - while (!cpu_online(irq_rover)) { - if (++irq_rover >= nr_cpu_ids) - irq_rover = 0; - } - cpuid = irq_rover; - do { - if (++irq_rover >= nr_cpu_ids) - irq_rover = 0; - } while (!cpu_online(irq_rover)); - - spin_unlock_irqrestore(&irq_rover_lock, flags); + if (cpus_equal(mask, cpu_online_map)) { + cpuid = map_to_cpu(virt_irq); } else { cpumask_t tmp; cpus_and(tmp, cpu_online_map, mask); - - if (cpus_empty(tmp)) - goto do_round_robin; - - cpuid = first_cpu(tmp); + cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp); } return cpuid; diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index f0e6ed23a46..938da19dc06 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -574,7 +574,7 @@ static void __init report_platform_properties(void) mdesc_release(hp); } -static void __devinit fill_in_one_cache(cpuinfo_sparc *c, +static void __cpuinit fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) { @@ -619,8 +619,7 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c, } } -static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp, - int core_id) +static void __cpuinit mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) { u64 a; @@ -653,7 +652,7 @@ static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp, } } -static void __devinit set_core_ids(struct mdesc_handle *hp) +static void __cpuinit set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; @@ -678,8 +677,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp) } } -static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, - int proc_id) +static void __cpuinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { u64 a; @@ -698,8 +696,7 @@ static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, } } -static void __devinit __set_proc_ids(struct mdesc_handle *hp, - const char *exec_unit_name) +static void __cpuinit __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name) { int idx; u64 mp; @@ -720,13 +717,13 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp, } } -static void __devinit set_proc_ids(struct mdesc_handle *hp) +static void __cpuinit set_proc_ids(struct mdesc_handle *hp) { __set_proc_ids(hp, "exec_unit"); __set_proc_ids(hp, "exec-unit"); } -static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask, +static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) { u64 val; @@ -745,7 +742,7 @@ use_default: *mask = ((1U << def) * 64U) - 1U; } -static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp, +static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp, struct trap_per_cpu *tb) { const u64 *val; @@ -763,23 +760,15 @@ static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp, get_one_mondo_bits(val, &tb->nonresum_qmask, 2); } -void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask) +static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask) { struct mdesc_handle *hp = mdesc_grab(); + void *ret = NULL; u64 mp; - ncpus_probed = 0; mdesc_for_each_node_by_name(hp, mp, "cpu") { const u64 *id = mdesc_get_property(hp, mp, "id", NULL); - const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); - struct trap_per_cpu *tb; - cpuinfo_sparc *c; - int cpuid; - u64 a; - - ncpus_probed++; - - cpuid = *id; + int cpuid = *id; #ifdef CONFIG_SMP if (cpuid >= NR_CPUS) { @@ -788,62 +777,104 @@ void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask) cpuid, NR_CPUS); continue; } - if (!cpu_isset(cpuid, mask)) + if (!cpu_isset(cpuid, *mask)) continue; -#else - /* On uniprocessor we only want the values for the - * real physical cpu the kernel booted onto, however - * cpu_data() only has one entry at index 0. - */ - if (cpuid != real_hard_smp_processor_id()) - continue; - cpuid = 0; #endif - c = &cpu_data(cpuid); - c->clock_tick = *cfreq; + ret = func(hp, mp, cpuid, arg); + if (ret) + goto out; + } +out: + mdesc_release(hp); + return ret; +} - tb = &trap_block[cpuid]; - get_mondo_data(hp, mp, tb); +static void * __cpuinit record_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) +{ + ncpus_probed++; +#ifdef CONFIG_SMP + set_cpu_present(cpuid, true); +#endif + return NULL; +} - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { - u64 j, t = mdesc_arc_target(hp, a); - const char *t_name; +void __cpuinit mdesc_populate_present_mask(cpumask_t *mask) +{ + if (tlb_type != hypervisor) + return; - t_name = mdesc_node_name(hp, t); - if (!strcmp(t_name, "cache")) { - fill_in_one_cache(c, hp, t); - continue; - } + ncpus_probed = 0; + mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); +} - mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) { - u64 n = mdesc_arc_target(hp, j); - const char *n_name; +static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) +{ + const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); + struct trap_per_cpu *tb; + cpuinfo_sparc *c; + u64 a; - n_name = mdesc_node_name(hp, n); - if (!strcmp(n_name, "cache")) - fill_in_one_cache(c, hp, n); - } +#ifndef CONFIG_SMP + /* On uniprocessor we only want the values for the + * real physical cpu the kernel booted onto, however + * cpu_data() only has one entry at index 0. + */ + if (cpuid != real_hard_smp_processor_id()) + return NULL; + cpuid = 0; +#endif + + c = &cpu_data(cpuid); + c->clock_tick = *cfreq; + + tb = &trap_block[cpuid]; + get_mondo_data(hp, mp, tb); + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 j, t = mdesc_arc_target(hp, a); + const char *t_name; + + t_name = mdesc_node_name(hp, t); + if (!strcmp(t_name, "cache")) { + fill_in_one_cache(c, hp, t); + continue; } -#ifdef CONFIG_SMP - cpu_set(cpuid, cpu_present_map); -#endif + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; - c->core_id = 0; - c->proc_id = -1; + n_name = mdesc_node_name(hp, n); + if (!strcmp(n_name, "cache")) + fill_in_one_cache(c, hp, n); + } } + c->core_id = 0; + c->proc_id = -1; + + return NULL; +} + +void __cpuinit mdesc_fill_in_cpu_data(cpumask_t *mask) +{ + struct mdesc_handle *hp; + + mdesc_iterate_over_cpus(fill_in_one_cpu, NULL, mask); + #ifdef CONFIG_SMP sparc64_multi_core = 1; #endif + hp = mdesc_grab(); + set_core_ids(hp); set_proc_ids(hp); - smp_fill_in_sib_core_maps(); - mdesc_release(hp); + + smp_fill_in_sib_core_maps(); } static ssize_t mdesc_read(struct file *file, char __user *buf, @@ -887,7 +918,6 @@ void __init sun4v_mdesc_init(void) { struct mdesc_handle *hp; unsigned long len, real_len, status; - cpumask_t mask; (void) sun4v_mach_desc(0UL, 0UL, &len); @@ -911,7 +941,4 @@ void __init sun4v_mdesc_init(void) cur_mdesc = hp; report_platform_properties(); - - cpus_setall(mask); - mdesc_fill_in_cpu_data(mask); } diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index c8f14c1dc52..90396702ea2 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -6,159 +6,11 @@ #include <linux/mod_devicetable.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/irq.h> #include <linux/of_device.h> #include <linux/of_platform.h> -static int node_match(struct device *dev, void *data) -{ - struct of_device *op = to_of_device(dev); - struct device_node *dp = data; - - return (op->node == dp); -} - -struct of_device *of_find_device_by_node(struct device_node *dp) -{ - struct device *dev = bus_find_device(&of_platform_bus_type, NULL, - dp, node_match); - - if (dev) - return to_of_device(dev); - - return NULL; -} -EXPORT_SYMBOL(of_find_device_by_node); - -unsigned int irq_of_parse_and_map(struct device_node *node, int index) -{ - struct of_device *op = of_find_device_by_node(node); - - if (!op || index >= op->num_irqs) - return 0; - - return op->irqs[index]; -} -EXPORT_SYMBOL(irq_of_parse_and_map); - -/* Take the archdata values for IOMMU, STC, and HOSTDATA found in - * BUS and propagate to all child of_device objects. - */ -void of_propagate_archdata(struct of_device *bus) -{ - struct dev_archdata *bus_sd = &bus->dev.archdata; - struct device_node *bus_dp = bus->node; - struct device_node *dp; - - for (dp = bus_dp->child; dp; dp = dp->sibling) { - struct of_device *op = of_find_device_by_node(dp); - - op->dev.archdata.iommu = bus_sd->iommu; - op->dev.archdata.stc = bus_sd->stc; - op->dev.archdata.host_controller = bus_sd->host_controller; - op->dev.archdata.numa_node = bus_sd->numa_node; - - if (dp->child) - of_propagate_archdata(op); - } -} - -struct bus_type of_platform_bus_type; -EXPORT_SYMBOL(of_platform_bus_type); - -static inline u64 of_read_addr(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -static void __init get_cells(struct device_node *dp, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = of_n_addr_cells(dp); - if (sizec) - *sizec = of_n_size_cells(dp); -} - -/* Max address size we deal with */ -#define OF_MAX_ADDR_CELLS 4 - -struct of_bus { - const char *name; - const char *addr_prop_name; - int (*match)(struct device_node *parent); - void (*count_cells)(struct device_node *child, - int *addrc, int *sizec); - int (*map)(u32 *addr, const u32 *range, - int na, int ns, int pna); - unsigned long (*get_flags)(const u32 *addr, unsigned long); -}; - -/* - * Default translator (generic bus) - */ - -static void of_bus_default_count_cells(struct device_node *dev, - int *addrc, int *sizec) -{ - get_cells(dev, addrc, sizec); -} - -/* Make sure the least significant 64-bits are in-range. Even - * for 3 or 4 cell values it is a good enough approximation. - */ -static int of_out_of_range(const u32 *addr, const u32 *base, - const u32 *size, int na, int ns) -{ - u64 a = of_read_addr(addr, na); - u64 b = of_read_addr(base, na); - - if (a < b) - return 1; - - b += of_read_addr(size, ns); - if (a >= b) - return 1; - - return 0; -} - -static int of_bus_default_map(u32 *addr, const u32 *range, - int na, int ns, int pna) -{ - u32 result[OF_MAX_ADDR_CELLS]; - int i; - - if (ns > 2) { - printk("of_device: Cannot handle size cells (%d) > 2.", ns); - return -EINVAL; - } - - if (of_out_of_range(addr, range, range + na + pna, na, ns)) - return -EINVAL; - - /* Start with the parent range base. */ - memcpy(result, range + na, pna * 4); - - /* Add in the child address offset. */ - for (i = 0; i < na; i++) - result[pna - 1 - i] += - (addr[na - 1 - i] - - range[na - 1 - i]); - - memcpy(addr, result, pna * 4); - - return 0; -} - -static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags) -{ - if (flags) - return flags; - return IORESOURCE_MEM; -} +#include "of_device_common.h" /* * PCI bus specific translator @@ -240,47 +92,6 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags) return flags; } -/* - * SBUS bus specific translator - */ - -static int of_bus_sbus_match(struct device_node *np) -{ - struct device_node *dp = np; - - while (dp) { - if (!strcmp(dp->name, "sbus") || - !strcmp(dp->name, "sbi")) - return 1; - - /* Have a look at use_1to1_mapping(). We're trying - * to match SBUS if that's the top-level bus and we - * don't have some intervening real bus that provides - * ranges based translations. - */ - if (of_find_property(dp, "ranges", NULL) != NULL) - break; - - dp = dp->parent; - } - - return 0; -} - -static void of_bus_sbus_count_cells(struct device_node *child, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = 2; - if (sizec) - *sizec = 1; -} - -static int of_bus_sbus_map(u32 *addr, const u32 *range, int na, int ns, int pna) -{ - return of_bus_default_map(addr, range, na, ns, pna); -} - static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags) { return IORESOURCE_MEM; @@ -307,7 +118,7 @@ static struct of_bus of_busses[] = { .addr_prop_name = "reg", .match = of_bus_sbus_match, .count_cells = of_bus_sbus_count_cells, - .map = of_bus_sbus_map, + .map = of_bus_default_map, .get_flags = of_bus_sbus_get_flags, }, /* Default */ diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5ac287ac03d..881947e59e9 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -10,6 +10,8 @@ #include <linux/of_device.h> #include <linux/of_platform.h> +#include "of_device_common.h" + void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name) { unsigned long ret = res->start + offset; @@ -35,156 +37,6 @@ void of_iounmap(struct resource *res, void __iomem *base, unsigned long size) } EXPORT_SYMBOL(of_iounmap); -static int node_match(struct device *dev, void *data) -{ - struct of_device *op = to_of_device(dev); - struct device_node *dp = data; - - return (op->node == dp); -} - -struct of_device *of_find_device_by_node(struct device_node *dp) -{ - struct device *dev = bus_find_device(&of_platform_bus_type, NULL, - dp, node_match); - - if (dev) - return to_of_device(dev); - - return NULL; -} -EXPORT_SYMBOL(of_find_device_by_node); - -unsigned int irq_of_parse_and_map(struct device_node *node, int index) -{ - struct of_device *op = of_find_device_by_node(node); - - if (!op || index >= op->num_irqs) - return 0; - - return op->irqs[index]; -} -EXPORT_SYMBOL(irq_of_parse_and_map); - -/* Take the archdata values for IOMMU, STC, and HOSTDATA found in - * BUS and propagate to all child of_device objects. - */ -void of_propagate_archdata(struct of_device *bus) -{ - struct dev_archdata *bus_sd = &bus->dev.archdata; - struct device_node *bus_dp = bus->node; - struct device_node *dp; - - for (dp = bus_dp->child; dp; dp = dp->sibling) { - struct of_device *op = of_find_device_by_node(dp); - - op->dev.archdata.iommu = bus_sd->iommu; - op->dev.archdata.stc = bus_sd->stc; - op->dev.archdata.host_controller = bus_sd->host_controller; - op->dev.archdata.numa_node = bus_sd->numa_node; - - if (dp->child) - of_propagate_archdata(op); - } -} - -struct bus_type of_platform_bus_type; -EXPORT_SYMBOL(of_platform_bus_type); - -static inline u64 of_read_addr(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -static void get_cells(struct device_node *dp, int *addrc, int *sizec) -{ - if (addrc) - *addrc = of_n_addr_cells(dp); - if (sizec) - *sizec = of_n_size_cells(dp); -} - -/* Max address size we deal with */ -#define OF_MAX_ADDR_CELLS 4 - -struct of_bus { - const char *name; - const char *addr_prop_name; - int (*match)(struct device_node *parent); - void (*count_cells)(struct device_node *child, - int *addrc, int *sizec); - int (*map)(u32 *addr, const u32 *range, - int na, int ns, int pna); - unsigned long (*get_flags)(const u32 *addr, unsigned long); -}; - -/* - * Default translator (generic bus) - */ - -static void of_bus_default_count_cells(struct device_node *dev, - int *addrc, int *sizec) -{ - get_cells(dev, addrc, sizec); -} - -/* Make sure the least significant 64-bits are in-range. Even - * for 3 or 4 cell values it is a good enough approximation. - */ -static int of_out_of_range(const u32 *addr, const u32 *base, - const u32 *size, int na, int ns) -{ - u64 a = of_read_addr(addr, na); - u64 b = of_read_addr(base, na); - - if (a < b) - return 1; - - b += of_read_addr(size, ns); - if (a >= b) - return 1; - - return 0; -} - -static int of_bus_default_map(u32 *addr, const u32 *range, - int na, int ns, int pna) -{ - u32 result[OF_MAX_ADDR_CELLS]; - int i; - - if (ns > 2) { - printk("of_device: Cannot handle size cells (%d) > 2.", ns); - return -EINVAL; - } - - if (of_out_of_range(addr, range, range + na + pna, na, ns)) - return -EINVAL; - - /* Start with the parent range base. */ - memcpy(result, range + na, pna * 4); - - /* Add in the child address offset. */ - for (i = 0; i < na; i++) - result[pna - 1 - i] += - (addr[na - 1 - i] - - range[na - 1 - i]); - - memcpy(addr, result, pna * 4); - - return 0; -} - -static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags) -{ - if (flags) - return flags; - return IORESOURCE_MEM; -} - /* * PCI bus specific translator */ @@ -295,42 +147,6 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags) } /* - * SBUS bus specific translator - */ - -static int of_bus_sbus_match(struct device_node *np) -{ - struct device_node *dp = np; - - while (dp) { - if (!strcmp(dp->name, "sbus") || - !strcmp(dp->name, "sbi")) - return 1; - - /* Have a look at use_1to1_mapping(). We're trying - * to match SBUS if that's the top-level bus and we - * don't have some intervening real bus that provides - * ranges based translations. - */ - if (of_find_property(dp, "ranges", NULL) != NULL) - break; - - dp = dp->parent; - } - - return 0; -} - -static void of_bus_sbus_count_cells(struct device_node *child, - int *addrc, int *sizec) -{ - if (addrc) - *addrc = 2; - if (sizec) - *sizec = 1; -} - -/* * FHC/Central bus specific translator. * * This is just needed to hard-code the address and size cell diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c new file mode 100644 index 00000000000..cb8eb799bb6 --- /dev/null +++ b/arch/sparc/kernel/of_device_common.c @@ -0,0 +1,174 @@ +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> + +#include "of_device_common.h" + +static int node_match(struct device *dev, void *data) +{ + struct of_device *op = to_of_device(dev); + struct device_node *dp = data; + + return (op->node == dp); +} + +struct of_device *of_find_device_by_node(struct device_node *dp) +{ + struct device *dev = bus_find_device(&of_platform_bus_type, NULL, + dp, node_match); + + if (dev) + return to_of_device(dev); + + return NULL; +} +EXPORT_SYMBOL(of_find_device_by_node); + +unsigned int irq_of_parse_and_map(struct device_node *node, int index) +{ + struct of_device *op = of_find_device_by_node(node); + + if (!op || index >= op->num_irqs) + return 0; + + return op->irqs[index]; +} +EXPORT_SYMBOL(irq_of_parse_and_map); + +/* Take the archdata values for IOMMU, STC, and HOSTDATA found in + * BUS and propagate to all child of_device objects. + */ +void of_propagate_archdata(struct of_device *bus) +{ + struct dev_archdata *bus_sd = &bus->dev.archdata; + struct device_node *bus_dp = bus->node; + struct device_node *dp; + + for (dp = bus_dp->child; dp; dp = dp->sibling) { + struct of_device *op = of_find_device_by_node(dp); + + op->dev.archdata.iommu = bus_sd->iommu; + op->dev.archdata.stc = bus_sd->stc; + op->dev.archdata.host_controller = bus_sd->host_controller; + op->dev.archdata.numa_node = bus_sd->numa_node; + + if (dp->child) + of_propagate_archdata(op); + } +} + +struct bus_type of_platform_bus_type; +EXPORT_SYMBOL(of_platform_bus_type); + +static void get_cells(struct device_node *dp, int *addrc, int *sizec) +{ + if (addrc) + *addrc = of_n_addr_cells(dp); + if (sizec) + *sizec = of_n_size_cells(dp); +} + +/* + * Default translator (generic bus) + */ + +void of_bus_default_count_cells(struct device_node *dev, int *addrc, int *sizec) +{ + get_cells(dev, addrc, sizec); +} + +/* Make sure the least significant 64-bits are in-range. Even + * for 3 or 4 cell values it is a good enough approximation. + */ +int of_out_of_range(const u32 *addr, const u32 *base, + const u32 *size, int na, int ns) +{ + u64 a = of_read_addr(addr, na); + u64 b = of_read_addr(base, na); + + if (a < b) + return 1; + + b += of_read_addr(size, ns); + if (a >= b) + return 1; + + return 0; +} + +int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna) +{ + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + if (ns > 2) { + printk("of_device: Cannot handle size cells (%d) > 2.", ns); + return -EINVAL; + } + + if (of_out_of_range(addr, range, range + na + pna, na, ns)) + return -EINVAL; + + /* Start with the parent range base. */ + memcpy(result, range + na, pna * 4); + + /* Add in the child address offset. */ + for (i = 0; i < na; i++) + result[pna - 1 - i] += + (addr[na - 1 - i] - + range[na - 1 - i]); + + memcpy(addr, result, pna * 4); + + return 0; +} + +unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags) +{ + if (flags) + return flags; + return IORESOURCE_MEM; +} + +/* + * SBUS bus specific translator + */ + +int of_bus_sbus_match(struct device_node *np) +{ + struct device_node *dp = np; + + while (dp) { + if (!strcmp(dp->name, "sbus") || + !strcmp(dp->name, "sbi")) + return 1; + + /* Have a look at use_1to1_mapping(). We're trying + * to match SBUS if that's the top-level bus and we + * don't have some intervening real bus that provides + * ranges based translations. + */ + if (of_find_property(dp, "ranges", NULL) != NULL) + break; + + dp = dp->parent; + } + + return 0; +} + +void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec) +{ + if (addrc) + *addrc = 2; + if (sizec) + *sizec = 1; +} diff --git a/arch/sparc/kernel/of_device_common.h b/arch/sparc/kernel/of_device_common.h new file mode 100644 index 00000000000..cdfd2399284 --- /dev/null +++ b/arch/sparc/kernel/of_device_common.h @@ -0,0 +1,36 @@ +#ifndef _OF_DEVICE_COMMON_H +#define _OF_DEVICE_COMMON_H + +static inline u64 of_read_addr(const u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + +void of_bus_default_count_cells(struct device_node *dev, int *addrc, + int *sizec); +int of_out_of_range(const u32 *addr, const u32 *base, + const u32 *size, int na, int ns); +int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna); +unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags); + +int of_bus_sbus_match(struct device_node *np); +void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec); + +/* Max address size we deal with */ +#define OF_MAX_ADDR_CELLS 4 + +struct of_bus { + const char *name; + const char *addr_prop_name; + int (*match)(struct device_node *parent); + void (*count_cells)(struct device_node *child, + int *addrc, int *sizec); + int (*map)(u32 *addr, const u32 *range, + int na, int ns, int pna); + unsigned long (*get_flags)(const u32 *addr, unsigned long); +}; + +#endif /* _OF_DEVICE_COMMON_H */ diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 5db5ebed35d..2485eaa2310 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -230,8 +230,9 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, free_pages((unsigned long)cpu, order); } -static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz, - enum dma_data_direction direction) +static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t sz, + enum dma_data_direction direction) { struct iommu *iommu; unsigned long flags, npages, oaddr; @@ -245,7 +246,7 @@ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz, if (unlikely(direction == DMA_NONE)) goto bad; - oaddr = (unsigned long)ptr; + oaddr = (unsigned long)(page_address(page) + offset); npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; @@ -294,8 +295,8 @@ iommu_map_fail: return DMA_ERROR_CODE; } -static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr, - size_t sz, enum dma_data_direction direction) +static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, + size_t sz, enum dma_data_direction direction) { struct pci_pbm_info *pbm; struct iommu *iommu; @@ -537,8 +538,8 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev, static const struct dma_ops sun4v_dma_ops = { .alloc_coherent = dma_4v_alloc_coherent, .free_coherent = dma_4v_free_coherent, - .map_single = dma_4v_map_single, - .unmap_single = dma_4v_unmap_single, + .map_page = dma_4v_map_page, + .unmap_page = dma_4v_unmap_page, .map_sg = dma_4v_map_sg, .unmap_sg = dma_4v_unmap_sg, .sync_single_for_cpu = dma_4v_sync_single_for_cpu, diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h index bb0f0fda6ca..453397fe5e1 100644 --- a/arch/sparc/kernel/prom.h +++ b/arch/sparc/kernel/prom.h @@ -22,7 +22,6 @@ static inline int is_root_node(const struct device_node *dp) extern char *build_path_component(struct device_node *dp); extern void of_console_init(void); -extern void of_fill_in_cpu_data(void); extern unsigned int prom_early_allocated; diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index ca55c7012f7..fb06ac2bd38 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -374,75 +374,26 @@ static const char *get_mid_prop(void) return (tlb_type == spitfire ? "upa-portid" : "portid"); } -struct device_node *of_find_node_by_cpuid(int cpuid) -{ - struct device_node *dp; - const char *mid_prop = get_mid_prop(); - - for_each_node_by_type(dp, "cpu") { - int id = of_getintprop_default(dp, mid_prop, -1); - const char *this_mid_prop = mid_prop; - - if (id < 0) { - this_mid_prop = "cpuid"; - id = of_getintprop_default(dp, this_mid_prop, -1); - } - - if (id < 0) { - prom_printf("OF: Serious problem, cpu lacks " - "%s property", this_mid_prop); - prom_halt(); - } - if (cpuid == id) - return dp; - } - return NULL; -} - -void __init of_fill_in_cpu_data(void) +static void *of_iterate_over_cpus(void *(*func)(struct device_node *, int, int), int arg) { struct device_node *dp; const char *mid_prop; - if (tlb_type == hypervisor) - return; - mid_prop = get_mid_prop(); - ncpus_probed = 0; for_each_node_by_type(dp, "cpu") { int cpuid = of_getintprop_default(dp, mid_prop, -1); const char *this_mid_prop = mid_prop; - struct device_node *portid_parent; - int portid = -1; + void *ret; - portid_parent = NULL; if (cpuid < 0) { this_mid_prop = "cpuid"; cpuid = of_getintprop_default(dp, this_mid_prop, -1); - if (cpuid >= 0) { - int limit = 2; - - portid_parent = dp; - while (limit--) { - portid_parent = portid_parent->parent; - if (!portid_parent) - break; - portid = of_getintprop_default(portid_parent, - "portid", -1); - if (portid >= 0) - break; - } - } } - if (cpuid < 0) { prom_printf("OF: Serious problem, cpu lacks " "%s property", this_mid_prop); prom_halt(); } - - ncpus_probed++; - #ifdef CONFIG_SMP if (cpuid >= NR_CPUS) { printk(KERN_WARNING "Ignoring CPU %d which is " @@ -450,79 +401,142 @@ void __init of_fill_in_cpu_data(void) cpuid, NR_CPUS); continue; } -#else - /* On uniprocessor we only want the values for the - * real physical cpu the kernel booted onto, however - * cpu_data() only has one entry at index 0. - */ - if (cpuid != real_hard_smp_processor_id()) - continue; - cpuid = 0; #endif + ret = func(dp, cpuid, arg); + if (ret) + return ret; + } + return NULL; +} - cpu_data(cpuid).clock_tick = - of_getintprop_default(dp, "clock-frequency", 0); - - if (portid_parent) { - cpu_data(cpuid).dcache_size = - of_getintprop_default(dp, "l1-dcache-size", - 16 * 1024); - cpu_data(cpuid).dcache_line_size = - of_getintprop_default(dp, "l1-dcache-line-size", - 32); - cpu_data(cpuid).icache_size = - of_getintprop_default(dp, "l1-icache-size", - 8 * 1024); - cpu_data(cpuid).icache_line_size = - of_getintprop_default(dp, "l1-icache-line-size", - 32); - cpu_data(cpuid).ecache_size = - of_getintprop_default(dp, "l2-cache-size", 0); - cpu_data(cpuid).ecache_line_size = - of_getintprop_default(dp, "l2-cache-line-size", 0); - if (!cpu_data(cpuid).ecache_size || - !cpu_data(cpuid).ecache_line_size) { - cpu_data(cpuid).ecache_size = - of_getintprop_default(portid_parent, - "l2-cache-size", - (4 * 1024 * 1024)); - cpu_data(cpuid).ecache_line_size = - of_getintprop_default(portid_parent, - "l2-cache-line-size", 64); - } - - cpu_data(cpuid).core_id = portid + 1; - cpu_data(cpuid).proc_id = portid; +static void *check_cpu_node(struct device_node *dp, int cpuid, int id) +{ + if (id == cpuid) + return dp; + return NULL; +} + +struct device_node *of_find_node_by_cpuid(int cpuid) +{ + return of_iterate_over_cpus(check_cpu_node, cpuid); +} + +static void *record_one_cpu(struct device_node *dp, int cpuid, int arg) +{ + ncpus_probed++; #ifdef CONFIG_SMP - sparc64_multi_core = 1; + set_cpu_present(cpuid, true); + set_cpu_possible(cpuid, true); #endif - } else { - cpu_data(cpuid).dcache_size = - of_getintprop_default(dp, "dcache-size", 16 * 1024); - cpu_data(cpuid).dcache_line_size = - of_getintprop_default(dp, "dcache-line-size", 32); + return NULL; +} - cpu_data(cpuid).icache_size = - of_getintprop_default(dp, "icache-size", 16 * 1024); - cpu_data(cpuid).icache_line_size = - of_getintprop_default(dp, "icache-line-size", 32); +void __init of_populate_present_mask(void) +{ + if (tlb_type == hypervisor) + return; + + ncpus_probed = 0; + of_iterate_over_cpus(record_one_cpu, 0); +} +static void *fill_in_one_cpu(struct device_node *dp, int cpuid, int arg) +{ + struct device_node *portid_parent = NULL; + int portid = -1; + + if (of_find_property(dp, "cpuid", NULL)) { + int limit = 2; + + portid_parent = dp; + while (limit--) { + portid_parent = portid_parent->parent; + if (!portid_parent) + break; + portid = of_getintprop_default(portid_parent, + "portid", -1); + if (portid >= 0) + break; + } + } + +#ifndef CONFIG_SMP + /* On uniprocessor we only want the values for the + * real physical cpu the kernel booted onto, however + * cpu_data() only has one entry at index 0. + */ + if (cpuid != real_hard_smp_processor_id()) + return NULL; + cpuid = 0; +#endif + + cpu_data(cpuid).clock_tick = + of_getintprop_default(dp, "clock-frequency", 0); + + if (portid_parent) { + cpu_data(cpuid).dcache_size = + of_getintprop_default(dp, "l1-dcache-size", + 16 * 1024); + cpu_data(cpuid).dcache_line_size = + of_getintprop_default(dp, "l1-dcache-line-size", + 32); + cpu_data(cpuid).icache_size = + of_getintprop_default(dp, "l1-icache-size", + 8 * 1024); + cpu_data(cpuid).icache_line_size = + of_getintprop_default(dp, "l1-icache-line-size", + 32); + cpu_data(cpuid).ecache_size = + of_getintprop_default(dp, "l2-cache-size", 0); + cpu_data(cpuid).ecache_line_size = + of_getintprop_default(dp, "l2-cache-line-size", 0); + if (!cpu_data(cpuid).ecache_size || + !cpu_data(cpuid).ecache_line_size) { cpu_data(cpuid).ecache_size = - of_getintprop_default(dp, "ecache-size", + of_getintprop_default(portid_parent, + "l2-cache-size", (4 * 1024 * 1024)); cpu_data(cpuid).ecache_line_size = - of_getintprop_default(dp, "ecache-line-size", 64); - - cpu_data(cpuid).core_id = 0; - cpu_data(cpuid).proc_id = -1; + of_getintprop_default(portid_parent, + "l2-cache-line-size", 64); } + cpu_data(cpuid).core_id = portid + 1; + cpu_data(cpuid).proc_id = portid; #ifdef CONFIG_SMP - set_cpu_present(cpuid, true); - set_cpu_possible(cpuid, true); + sparc64_multi_core = 1; #endif + } else { + cpu_data(cpuid).dcache_size = + of_getintprop_default(dp, "dcache-size", 16 * 1024); + cpu_data(cpuid).dcache_line_size = + of_getintprop_default(dp, "dcache-line-size", 32); + + cpu_data(cpuid).icache_size = + of_getintprop_default(dp, "icache-size", 16 * 1024); + cpu_data(cpuid).icache_line_size = + of_getintprop_default(dp, "icache-line-size", 32); + + cpu_data(cpuid).ecache_size = + of_getintprop_default(dp, "ecache-size", + (4 * 1024 * 1024)); + cpu_data(cpuid).ecache_line_size = + of_getintprop_default(dp, "ecache-line-size", 64); + + cpu_data(cpuid).core_id = 0; + cpu_data(cpuid).proc_id = -1; } + return NULL; +} + +void __init of_fill_in_cpu_data(void) +{ + if (tlb_type == hypervisor) + return; + + of_iterate_over_cpus(fill_in_one_cpu, 0); + smp_fill_in_sib_core_maps(); } diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index ff7b591c894..0fb5789d43c 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -313,6 +313,4 @@ void __init prom_build_devicetree(void) printk("PROM: Built device tree with %u bytes of memory.\n", prom_early_allocated); - - of_fill_in_cpu_data(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f7642e5a94d..fa44eaf8d89 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -20,7 +20,8 @@ #include <linux/cache.h> #include <linux/jiffies.h> #include <linux/profile.h> -#include <linux/lmb.h> +#include <linux/bootmem.h> +#include <linux/vmalloc.h> #include <linux/cpu.h> #include <asm/head.h> @@ -47,6 +48,8 @@ #include <asm/ldc.h> #include <asm/hypervisor.h> +#include "cpumap.h" + int sparc64_multi_core __read_mostly; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; @@ -278,7 +281,7 @@ static unsigned long kimage_addr_to_ra(void *p) return kern_base + (val - KERNBASE); } -static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp) { extern unsigned long sparc64_ttable_tl0; extern unsigned long kern_locked_tte_data; @@ -298,12 +301,12 @@ static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread "hvtramp_descr.\n"); return; } + *descrp = hdesc; hdesc->cpu = cpu; hdesc->num_mappings = num_kernel_image_mappings; tb = &trap_block[cpu]; - tb->hdesc = hdesc; hdesc->fault_info_va = (unsigned long) &tb->fault_info; hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); @@ -341,12 +344,12 @@ static struct thread_info *cpu_new_thread = NULL; static int __cpuinit smp_boot_one_cpu(unsigned int cpu) { - struct trap_per_cpu *tb = &trap_block[cpu]; unsigned long entry = (unsigned long)(&sparc64_cpu_startup); unsigned long cookie = (unsigned long)(&cpu_new_thread); struct task_struct *p; + void *descr = NULL; int timeout, ret; p = fork_idle(cpu); @@ -359,7 +362,8 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu) #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) if (ldom_domaining_enabled) ldom_startcpu_cpuid(cpu, - (unsigned long) cpu_new_thread); + (unsigned long) cpu_new_thread, + &descr); else #endif prom_startcpu_cpuid(cpu, entry, cookie); @@ -383,10 +387,7 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu) } cpu_new_thread = NULL; - if (tb->hdesc) { - kfree(tb->hdesc); - tb->hdesc = NULL; - } + kfree(descr); return ret; } @@ -1315,6 +1316,8 @@ int __cpu_disable(void) cpu_clear(cpu, cpu_online_map); ipi_call_unlock(); + cpu_map_rebuild(); + return 0; } @@ -1373,36 +1376,171 @@ void smp_send_stop(void) { } -unsigned long __per_cpu_base __read_mostly; -unsigned long __per_cpu_shift __read_mostly; +/** + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu + * @cpu: cpu to allocate for + * @size: size allocation in bytes + * @align: alignment + * + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper + * does the right thing for NUMA regardless of the current + * configuration. + * + * RETURNS: + * Pointer to the allocated area on success, NULL on failure. + */ +static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, + unsigned long align) +{ + const unsigned long goal = __pa(MAX_DMA_ADDRESS); +#ifdef CONFIG_NEED_MULTIPLE_NODES + int node = cpu_to_node(cpu); + void *ptr; + + if (!node_online(node) || !NODE_DATA(node)) { + ptr = __alloc_bootmem(size, align, goal); + pr_info("cpu %d has no node %d or node-local memory\n", + cpu, node); + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", + cpu, size, __pa(ptr)); + } else { + ptr = __alloc_bootmem_node(NODE_DATA(node), + size, align, goal); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " + "%016lx\n", cpu, size, node, __pa(ptr)); + } + return ptr; +#else + return __alloc_bootmem(size, align, goal); +#endif +} -EXPORT_SYMBOL(__per_cpu_base); -EXPORT_SYMBOL(__per_cpu_shift); +static size_t pcpur_size __initdata; +static void **pcpur_ptrs __initdata; -void __init real_setup_per_cpu_areas(void) +static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) { - unsigned long paddr, goal, size, i; - char *ptr; + size_t off = (size_t)pageno << PAGE_SHIFT; - /* Copy section for each CPU (we discard the original) */ - goal = PERCPU_ENOUGH_ROOM; + if (off >= pcpur_size) + return NULL; - __per_cpu_shift = PAGE_SHIFT; - for (size = PAGE_SIZE; size < goal; size <<= 1UL) - __per_cpu_shift++; + return virt_to_page(pcpur_ptrs[cpu] + off); +} + +#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) + +static void __init pcpu_map_range(unsigned long start, unsigned long end, + struct page *page) +{ + unsigned long pfn = page_to_pfn(page); + unsigned long pte_base; + + BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL)); + + pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + while (start < end) { + pgd_t *pgd = pgd_offset_k(start); + unsigned long this_end; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pud = pud_offset(pgd, start); + if (pud_none(*pud)) { + pmd_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, start); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } - paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); - if (!paddr) { - prom_printf("Cannot allocate per-cpu memory.\n"); - prom_halt(); + pte = pte_offset_kernel(pmd, start); + this_end = (start + PMD_SIZE) & PMD_MASK; + if (this_end > end) + this_end = end; + + while (start < this_end) { + unsigned long paddr = pfn << PAGE_SHIFT; + + pte_val(*pte) = (paddr | pte_base); + + start += PAGE_SIZE; + pte++; + pfn++; + } + } +} + +void __init setup_per_cpu_areas(void) +{ + size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; + static struct vm_struct vm; + unsigned long delta, cpu; + size_t pcpu_unit_size; + size_t ptrs_size; + + pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); + dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; + + + ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); + pcpur_ptrs = alloc_bootmem(ptrs_size); + + for_each_possible_cpu(cpu) { + pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, + PCPU_CHUNK_SIZE); + + free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), + PCPU_CHUNK_SIZE - pcpur_size); + + memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); } - ptr = __va(paddr); - __per_cpu_base = ptr - __per_cpu_start; + /* allocate address and map */ + vm.flags = VM_ALLOC; + vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE; + vm_area_register_early(&vm, PCPU_CHUNK_SIZE); + + for_each_possible_cpu(cpu) { + unsigned long start = (unsigned long) vm.addr; + unsigned long end; + + start += cpu * PCPU_CHUNK_SIZE; + end = start + PCPU_CHUNK_SIZE; + pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu])); + } - for (i = 0; i < NR_CPUS; i++, ptr += size) - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, + PERCPU_MODULE_RESERVE, dyn_size, + PCPU_CHUNK_SIZE, vm.addr, NULL); + + free_bootmem(__pa(pcpur_ptrs), ptrs_size); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; + } /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); + + of_fill_in_cpu_data(); + if (tlb_type == hypervisor) + mdesc_fill_in_cpu_data(cpu_all_mask); } diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 00ec3b15f38..69090165729 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -81,4 +81,6 @@ sys_call_table: /*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 -/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv, sys_pwritev +/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo + diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 82b5bf85b9d..6b3ee88e253 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -82,7 +82,8 @@ sys_call_table32: .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv, compat_sys_pwritev +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo #endif /* CONFIG_COMPAT */ @@ -156,4 +157,5 @@ sys_call_table: .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv, sys_pwritev +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv + .word sys_pwritev, sys_rt_tgsigqueueinfo diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d809c4ebb48..10f7bb9fc14 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs) } struct trap_per_cpu trap_block[NR_CPUS]; +EXPORT_SYMBOL(trap_block); /* This can get invoked before sched_init() so play it super safe * and use hard_smp_processor_id(). @@ -2530,84 +2531,97 @@ extern void tsb_config_offsets_are_bolixed_dave(void); void __init trap_init(void) { /* Compile time sanity check. */ - if (TI_TASK != offsetof(struct thread_info, task) || - TI_FLAGS != offsetof(struct thread_info, flags) || - TI_CPU != offsetof(struct thread_info, cpu) || - TI_FPSAVED != offsetof(struct thread_info, fpsaved) || - TI_KSP != offsetof(struct thread_info, ksp) || - TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) || - TI_KREGS != offsetof(struct thread_info, kregs) || - TI_UTRAPS != offsetof(struct thread_info, utraps) || - TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) || - TI_REG_WINDOW != offsetof(struct thread_info, reg_window) || - TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) || - TI_GSR != offsetof(struct thread_info, gsr) || - TI_XFSR != offsetof(struct thread_info, xfsr) || - TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) || - TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) || - TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || - TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || - TI_PCR != offsetof(struct thread_info, pcr_reg) || - TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || - TI_NEW_CHILD != offsetof(struct thread_info, new_child) || - TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || - TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) || - TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) || - TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) || - TI_FPREGS != offsetof(struct thread_info, fpregs) || - (TI_FPREGS & (64 - 1))) - thread_info_offsets_are_bolixed_dave(); - - if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || - (TRAP_PER_CPU_PGD_PADDR != - offsetof(struct trap_per_cpu, pgd_paddr)) || - (TRAP_PER_CPU_CPU_MONDO_PA != - offsetof(struct trap_per_cpu, cpu_mondo_pa)) || - (TRAP_PER_CPU_DEV_MONDO_PA != - offsetof(struct trap_per_cpu, dev_mondo_pa)) || - (TRAP_PER_CPU_RESUM_MONDO_PA != - offsetof(struct trap_per_cpu, resum_mondo_pa)) || - (TRAP_PER_CPU_RESUM_KBUF_PA != - offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || - (TRAP_PER_CPU_NONRESUM_MONDO_PA != - offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || - (TRAP_PER_CPU_NONRESUM_KBUF_PA != - offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || - (TRAP_PER_CPU_FAULT_INFO != - offsetof(struct trap_per_cpu, fault_info)) || - (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != - offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || - (TRAP_PER_CPU_CPU_LIST_PA != - offsetof(struct trap_per_cpu, cpu_list_pa)) || - (TRAP_PER_CPU_TSB_HUGE != - offsetof(struct trap_per_cpu, tsb_huge)) || - (TRAP_PER_CPU_TSB_HUGE_TEMP != - offsetof(struct trap_per_cpu, tsb_huge_temp)) || - (TRAP_PER_CPU_IRQ_WORKLIST_PA != - offsetof(struct trap_per_cpu, irq_worklist_pa)) || - (TRAP_PER_CPU_CPU_MONDO_QMASK != - offsetof(struct trap_per_cpu, cpu_mondo_qmask)) || - (TRAP_PER_CPU_DEV_MONDO_QMASK != - offsetof(struct trap_per_cpu, dev_mondo_qmask)) || - (TRAP_PER_CPU_RESUM_QMASK != - offsetof(struct trap_per_cpu, resum_qmask)) || - (TRAP_PER_CPU_NONRESUM_QMASK != - offsetof(struct trap_per_cpu, nonresum_qmask))) - trap_per_cpu_offsets_are_bolixed_dave(); - - if ((TSB_CONFIG_TSB != - offsetof(struct tsb_config, tsb)) || - (TSB_CONFIG_RSS_LIMIT != - offsetof(struct tsb_config, tsb_rss_limit)) || - (TSB_CONFIG_NENTRIES != - offsetof(struct tsb_config, tsb_nentries)) || - (TSB_CONFIG_REG_VAL != - offsetof(struct tsb_config, tsb_reg_val)) || - (TSB_CONFIG_MAP_VADDR != - offsetof(struct tsb_config, tsb_map_vaddr)) || - (TSB_CONFIG_MAP_PTE != - offsetof(struct tsb_config, tsb_map_pte))) - tsb_config_offsets_are_bolixed_dave(); + BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task) || + TI_FLAGS != offsetof(struct thread_info, flags) || + TI_CPU != offsetof(struct thread_info, cpu) || + TI_FPSAVED != offsetof(struct thread_info, fpsaved) || + TI_KSP != offsetof(struct thread_info, ksp) || + TI_FAULT_ADDR != offsetof(struct thread_info, + fault_address) || + TI_KREGS != offsetof(struct thread_info, kregs) || + TI_UTRAPS != offsetof(struct thread_info, utraps) || + TI_EXEC_DOMAIN != offsetof(struct thread_info, + exec_domain) || + TI_REG_WINDOW != offsetof(struct thread_info, + reg_window) || + TI_RWIN_SPTRS != offsetof(struct thread_info, + rwbuf_stkptrs) || + TI_GSR != offsetof(struct thread_info, gsr) || + TI_XFSR != offsetof(struct thread_info, xfsr) || + TI_USER_CNTD0 != offsetof(struct thread_info, + user_cntd0) || + TI_USER_CNTD1 != offsetof(struct thread_info, + user_cntd1) || + TI_KERN_CNTD0 != offsetof(struct thread_info, + kernel_cntd0) || + TI_KERN_CNTD1 != offsetof(struct thread_info, + kernel_cntd1) || + TI_PCR != offsetof(struct thread_info, pcr_reg) || + TI_PRE_COUNT != offsetof(struct thread_info, + preempt_count) || + TI_NEW_CHILD != offsetof(struct thread_info, new_child) || + TI_SYS_NOERROR != offsetof(struct thread_info, + syscall_noerror) || + TI_RESTART_BLOCK != offsetof(struct thread_info, + restart_block) || + TI_KUNA_REGS != offsetof(struct thread_info, + kern_una_regs) || + TI_KUNA_INSN != offsetof(struct thread_info, + kern_una_insn) || + TI_FPREGS != offsetof(struct thread_info, fpregs) || + (TI_FPREGS & (64 - 1))); + + BUILD_BUG_ON(TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, + thread) || + (TRAP_PER_CPU_PGD_PADDR != + offsetof(struct trap_per_cpu, pgd_paddr)) || + (TRAP_PER_CPU_CPU_MONDO_PA != + offsetof(struct trap_per_cpu, cpu_mondo_pa)) || + (TRAP_PER_CPU_DEV_MONDO_PA != + offsetof(struct trap_per_cpu, dev_mondo_pa)) || + (TRAP_PER_CPU_RESUM_MONDO_PA != + offsetof(struct trap_per_cpu, resum_mondo_pa)) || + (TRAP_PER_CPU_RESUM_KBUF_PA != + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || + (TRAP_PER_CPU_NONRESUM_MONDO_PA != + offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || + (TRAP_PER_CPU_NONRESUM_KBUF_PA != + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || + (TRAP_PER_CPU_FAULT_INFO != + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa)) || + (TRAP_PER_CPU_TSB_HUGE != + offsetof(struct trap_per_cpu, tsb_huge)) || + (TRAP_PER_CPU_TSB_HUGE_TEMP != + offsetof(struct trap_per_cpu, tsb_huge_temp)) || + (TRAP_PER_CPU_IRQ_WORKLIST_PA != + offsetof(struct trap_per_cpu, irq_worklist_pa)) || + (TRAP_PER_CPU_CPU_MONDO_QMASK != + offsetof(struct trap_per_cpu, cpu_mondo_qmask)) || + (TRAP_PER_CPU_DEV_MONDO_QMASK != + offsetof(struct trap_per_cpu, dev_mondo_qmask)) || + (TRAP_PER_CPU_RESUM_QMASK != + offsetof(struct trap_per_cpu, resum_qmask)) || + (TRAP_PER_CPU_NONRESUM_QMASK != + offsetof(struct trap_per_cpu, nonresum_qmask)) || + (TRAP_PER_CPU_PER_CPU_BASE != + offsetof(struct trap_per_cpu, __per_cpu_base))); + + BUILD_BUG_ON((TSB_CONFIG_TSB != + offsetof(struct tsb_config, tsb)) || + (TSB_CONFIG_RSS_LIMIT != + offsetof(struct tsb_config, tsb_rss_limit)) || + (TSB_CONFIG_NENTRIES != + offsetof(struct tsb_config, tsb_nentries)) || + (TSB_CONFIG_REG_VAL != + offsetof(struct tsb_config, tsb_reg_val)) || + (TSB_CONFIG_MAP_VADDR != + offsetof(struct tsb_config, tsb_map_vaddr)) || + (TSB_CONFIG_MAP_PTE != + offsetof(struct tsb_config, tsb_map_pte))); /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. |