From 1411e0ec3123ae4c4ead6bfc9fe3ee5a3ae5c327 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Dec 2010 16:48:17 -0800 Subject: x86-64, numa: Put pgtable to local node memory Introduce init_memory_mapping_high(), and use it with 64bit. It will go with every memory segment above 4g to create page table to the memory range itself. before this patch all page tables was on one node. with this patch, one RED-PEN is killed debug out for 8 sockets system after patch [ 0.000000] initial memory mapped : 0 - 20000000 [ 0.000000] init_memory_mapping: [0x00000000000000-0x0000007f74ffff] [ 0.000000] 0000000000 - 007f600000 page 2M [ 0.000000] 007f600000 - 007f750000 page 4k [ 0.000000] kernel direct mapping tables up to 7f750000 @ [0x7f74c000-0x7f74ffff] [ 0.000000] RAMDISK: 7bc84000 - 7f745000 .... [ 0.000000] Adding active range (0, 0x10, 0x95) 0 entries of 3200 used [ 0.000000] Adding active range (0, 0x100, 0x7f750) 1 entries of 3200 used [ 0.000000] Adding active range (0, 0x100000, 0x1080000) 2 entries of 3200 used [ 0.000000] Adding active range (1, 0x1080000, 0x2080000) 3 entries of 3200 used [ 0.000000] Adding active range (2, 0x2080000, 0x3080000) 4 entries of 3200 used [ 0.000000] Adding active range (3, 0x3080000, 0x4080000) 5 entries of 3200 used [ 0.000000] Adding active range (4, 0x4080000, 0x5080000) 6 entries of 3200 used [ 0.000000] Adding active range (5, 0x5080000, 0x6080000) 7 entries of 3200 used [ 0.000000] Adding active range (6, 0x6080000, 0x7080000) 8 entries of 3200 used [ 0.000000] Adding active range (7, 0x7080000, 0x8080000) 9 entries of 3200 used [ 0.000000] init_memory_mapping: [0x00000100000000-0x0000107fffffff] [ 0.000000] 0100000000 - 1080000000 page 2M [ 0.000000] kernel direct mapping tables up to 1080000000 @ [0x107ffbd000-0x107fffffff] [ 0.000000] memblock_x86_reserve_range: [0x107ffc2000-0x107fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00001080000000-0x0000207fffffff] [ 0.000000] 1080000000 - 2080000000 page 2M [ 0.000000] kernel direct mapping tables up to 2080000000 @ [0x207ff7d000-0x207fffffff] [ 0.000000] memblock_x86_reserve_range: [0x207ffc0000-0x207fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00002080000000-0x0000307fffffff] [ 0.000000] 2080000000 - 3080000000 page 2M [ 0.000000] kernel direct mapping tables up to 3080000000 @ [0x307ff3d000-0x307fffffff] [ 0.000000] memblock_x86_reserve_range: [0x307ffc0000-0x307fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00003080000000-0x0000407fffffff] [ 0.000000] 3080000000 - 4080000000 page 2M [ 0.000000] kernel direct mapping tables up to 4080000000 @ [0x407fefd000-0x407fffffff] [ 0.000000] memblock_x86_reserve_range: [0x407ffc0000-0x407fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00004080000000-0x0000507fffffff] [ 0.000000] 4080000000 - 5080000000 page 2M [ 0.000000] kernel direct mapping tables up to 5080000000 @ [0x507febd000-0x507fffffff] [ 0.000000] memblock_x86_reserve_range: [0x507ffc0000-0x507fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00005080000000-0x0000607fffffff] [ 0.000000] 5080000000 - 6080000000 page 2M [ 0.000000] kernel direct mapping tables up to 6080000000 @ [0x607fe7d000-0x607fffffff] [ 0.000000] memblock_x86_reserve_range: [0x607ffc0000-0x607fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00006080000000-0x0000707fffffff] [ 0.000000] 6080000000 - 7080000000 page 2M [ 0.000000] kernel direct mapping tables up to 7080000000 @ [0x707fe3d000-0x707fffffff] [ 0.000000] memblock_x86_reserve_range: [0x707ffc0000-0x707fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00007080000000-0x0000807fffffff] [ 0.000000] 7080000000 - 8080000000 page 2M [ 0.000000] kernel direct mapping tables up to 8080000000 @ [0x807fdfc000-0x807fffffff] [ 0.000000] memblock_x86_reserve_range: [0x807ffbf000-0x807fffffff] PGTABLE [ 0.000000] Initmem setup node 0 [0000000000000000-000000107fffffff] [ 0.000000] NODE_DATA [0x0000107ffbd000-0x0000107ffc1fff] [ 0.000000] Initmem setup node 1 [0000001080000000-000000207fffffff] [ 0.000000] NODE_DATA [0x0000207ffbb000-0x0000207ffbffff] [ 0.000000] Initmem setup node 2 [0000002080000000-000000307fffffff] [ 0.000000] NODE_DATA [0x0000307ffbb000-0x0000307ffbffff] [ 0.000000] Initmem setup node 3 [0000003080000000-000000407fffffff] [ 0.000000] NODE_DATA [0x0000407ffbb000-0x0000407ffbffff] [ 0.000000] Initmem setup node 4 [0000004080000000-000000507fffffff] [ 0.000000] NODE_DATA [0x0000507ffbb000-0x0000507ffbffff] [ 0.000000] Initmem setup node 5 [0000005080000000-000000607fffffff] [ 0.000000] NODE_DATA [0x0000607ffbb000-0x0000607ffbffff] [ 0.000000] Initmem setup node 6 [0000006080000000-000000707fffffff] [ 0.000000] NODE_DATA [0x0000707ffbb000-0x0000707ffbffff] [ 0.000000] Initmem setup node 7 [0000007080000000-000000807fffffff] [ 0.000000] NODE_DATA [0x0000807ffba000-0x0000807ffbefff] Signed-off-by: Yinghai Lu LKML-Reference: <4D1933D1.9020609@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/amdtopology_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 51fae9cfdec..ae6ad691a14 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -221,12 +221,14 @@ int __init amd_scan_nodes(void) apicid_base = boot_cpu_physical_apicid; } - for_each_node_mask(i, node_possible_map) { - int j; - + for_each_node_mask(i, node_possible_map) memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); + init_memory_mapping_high(); + for_each_node_mask(i, node_possible_map) { + int j; + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(i << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); -- cgit v1.2.3-70-g09d2 From bbc9e2f452d9c4b166d1f9a78d941d80173312fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jan 2011 14:37:39 +0100 Subject: x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit The mapping between cpu/apicid and node is done via apicid_to_node[] on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit. This difference makes it difficult to further unify 32 and 64bit NUMA handling. This patch unifies it by replacing both apicid_to_node[] and apicid_2_node[] with __apicid_to_node[] array, which is accessed by two accessors - set_apicid_to_node() and numa_cpu_node(). On 64bit, numa_cpu_node() always consults __apicid_to_node[] directly while 32bit goes through apic->numa_cpu_node() method to allow apic implementations to override it. srat_detect_node() for amd cpus contains workaround for broken NUMA configuration which assumes relationship between APIC ID, HT node ID and NUMA topology. Leave it to access __apicid_to_node[] directly as mapping through CPU might result in undesirable behavior change. The comment is reformatted and updated to note the ugliness. Signed-off-by: Tejun Heo Reviewed-by: Pekka Enberg Cc: eric.dumazet@gmail.com Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-14-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar Cc: David Rientjes --- arch/x86/include/asm/mpspec.h | 1 - arch/x86/include/asm/numa.h | 28 +++++++++++++++++++++++++ arch/x86/include/asm/numa_32.h | 6 ++++++ arch/x86/include/asm/numa_64.h | 5 ++--- arch/x86/kernel/acpi/boot.c | 3 +-- arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/cpu/amd.c | 47 +++++++++++++++++++++++++++--------------- arch/x86/kernel/cpu/intel.c | 3 +-- arch/x86/kernel/smpboot.c | 6 +----- arch/x86/mm/amdtopology_64.c | 4 ++-- arch/x86/mm/numa.c | 6 +++++- arch/x86/mm/numa_32.c | 6 ++++++ arch/x86/mm/numa_64.c | 26 ++++++++++------------- arch/x86/mm/srat_32.c | 2 +- arch/x86/mm/srat_64.c | 12 +++++------ 15 files changed, 101 insertions(+), 56 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index edc2a455b72..9c7d95f6174 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -25,7 +25,6 @@ extern int pic_mode; #define MAX_IRQ_SOURCES 256 extern unsigned int def_to_bigsmp; -extern u8 apicid_2_node[]; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 27da400d313..5e01c768a57 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -1,5 +1,33 @@ +#ifndef _ASM_X86_NUMA_H +#define _ASM_X86_NUMA_H + +#include + +#ifdef CONFIG_NUMA +/* + * __apicid_to_node[] stores the raw mapping between physical apicid and + * node and is used to initialize cpu_to_node mapping. + * + * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus + * should be accessed by the accessors - set_apicid_to_node() and + * numa_cpu_node(). + */ +extern s16 __apicid_to_node[MAX_LOCAL_APIC]; + +static inline void set_apicid_to_node(int apicid, s16 node) +{ + __apicid_to_node[apicid] = node; +} +#else /* CONFIG_NUMA */ +static inline void set_apicid_to_node(int apicid, s16 node) +{ +} +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_X86_32 # include "numa_32.h" #else # include "numa_64.h" #endif + +#endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index b0ef2b449a9..cdf8043d7a1 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h @@ -6,6 +6,12 @@ extern int numa_off; extern int pxm_to_nid(int pxm); extern void numa_remove_cpu(int cpu); +#ifdef CONFIG_NUMA +extern int __cpuinit numa_cpu_node(int apicid); +#else /* CONFIG_NUMA */ +static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_HIGHMEM extern void set_highmem_pages_init(void); #else diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 0493be39607..4982a9c08c2 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -2,7 +2,6 @@ #define _ASM_X86_NUMA_64_H #include -#include struct bootnode { u64 start; @@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, extern void numa_init_array(void); extern int numa_off; -extern s16 apicid_to_node[MAX_LOCAL_APIC]; - extern unsigned long numa_free_all_bootmem(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); @@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, #define NODE_MIN_SIZE (4*1024*1024) extern void __init init_cpu_to_node(void); +extern int __cpuinit numa_cpu_node(int cpu); extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); extern void __cpuinit numa_add_cpu(int cpu); @@ -44,6 +42,7 @@ void numa_emu_cmdline(char *); #endif /* CONFIG_NUMA_EMU */ #else static inline void init_cpu_to_node(void) { } +static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } static inline void numa_add_cpu(int cpu, int node) { } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b3a71137983..a7bca59ec59 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -589,11 +589,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) nid = acpi_get_node(handle); if (nid == -1 || !node_online(nid)) return; + set_apicid_to_node(physid, nid); #ifdef CONFIG_X86_64 - apicid_to_node[physid] = nid; numa_set_node(cpu, nid); #else /* CONFIG_X86_32 */ - apicid_2_node[physid] = nid; cpu_to_node_map[cpu] = nid; #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0f4f3c15231..4686ea59b7a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2026,7 +2026,7 @@ int default_x86_32_numa_cpu_node(int cpu) int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) - return apicid_2_node[apicid]; + return __apicid_to_node[apicid]; return NUMA_NO_NODE; #else return 0; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c7bedb83c5..3cce8f2bb2e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) #endif #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +/* + * To workaround broken NUMA config. Read the comment in + * srat_detect_node(). + */ static int __cpuinit nearby_node(int apicid) { int i, node; for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; + node = __apicid_to_node[i]; if (node != NUMA_NO_NODE && node_online(node)) return node; } for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; + node = __apicid_to_node[i]; if (node != NUMA_NO_NODE && node_online(node)) return node; } @@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) int node; unsigned apicid = c->apicid; - node = per_cpu(cpu_llc_id, cpu); + node = numa_cpu_node(cpu); + if (node == NUMA_NO_NODE) + node = per_cpu(cpu_llc_id, cpu); - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - + /* + * Two possibilities here: + * + * - The CPU is missing memory and no node was created. In + * that case try picking one from a nearby CPU. + * + * - The APIC IDs differ from the HyperTransport node IDs + * which the K8 northbridge parsing fills in. Assume + * they are all increased by a constant offset, but in + * the same order as the HT nodeids. If that doesn't + * result in a usable node fall back to the path for the + * previous case. + * + * This workaround operates directly on the mapping between + * APIC ID and NUMA node, assuming certain relationship + * between APIC ID, HT node ID and NUMA topology. As going + * through CPU mapping may alter the outcome, directly + * access __apicid_to_node[]. + */ int ht_nodeid = c->initial_apicid; if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; + __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = __apicid_to_node[ht_nodeid]; /* Pick a nearby node */ if (!node_online(node)) node = nearby_node(apicid); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d16c2c53d6b..6052004bf4f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); - int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; /* Don't do the funky fallback heuristics the AMD version employs for now. */ - node = apicid_to_node[apicid]; + node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE || !node_online(node)) { /* reuse the value from init_cpu_to_node() */ node = cpu_to_node(cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5319cdd5376..b7cfce535cb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -71,10 +71,6 @@ #include #include -#ifdef CONFIG_X86_32 -u8 apicid_2_node[MAX_LOCAL_APIC]; -#endif - /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void) int cpu = smp_processor_id(); int node; - node = apic->x86_32_numa_cpu_node(cpu); + node = numa_cpu_node(cpu); if (!node_online(node)) node = first_online_node; diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index f21962c435e..c7fae38c408 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) __acpi_map_pxm_to_node(nid, i); #endif } - memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); + memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); } #endif /* CONFIG_NUMA_EMU */ @@ -285,7 +285,7 @@ int __init amd_scan_nodes(void) nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); for (j = apicid_base; j < cores + apicid_base; j++) - apicid_to_node[(i << bits) + j] = i; + set_apicid_to_node((i << bits) + j, i); setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a3..480b3571c8b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -26,8 +26,12 @@ static __init int numa_setup(char *opt) early_param("numa", numa_setup); /* - * Which logical CPUs are on which nodes + * apicid, cpu, node mappings */ +s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; + cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 84a3e4c9f27..8d91d227be0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); static unsigned long kva_start_pfn; static unsigned long kva_pages; + +int __cpuinit numa_cpu_node(int cpu) +{ + return apic->x86_32_numa_cpu_node(cpu); +} + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 95ea1551eeb..1e1026f61a5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data); struct memnode memnode; -s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { - [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE -}; - static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; @@ -716,12 +712,8 @@ void __init init_cpu_to_node(void) BUG_ON(cpu_to_apicid == NULL); for_each_possible_cpu(cpu) { - int node; - u16 apicid = cpu_to_apicid[cpu]; + int node = numa_cpu_node(cpu); - if (apicid == BAD_APICID) - continue; - node = apicid_to_node[apicid]; if (node == NUMA_NO_NODE) continue; if (!node_online(node)) @@ -731,6 +723,14 @@ void __init init_cpu_to_node(void) } #endif +int __cpuinit numa_cpu_node(int cpu) +{ + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + + if (apicid != BAD_APICID) + return __apicid_to_node[apicid]; + return NUMA_NO_NODE; +} void __cpuinit numa_set_node(int cpu, int node) { @@ -776,13 +776,9 @@ void __cpuinit numa_remove_cpu(int cpu) void __cpuinit numa_add_cpu(int cpu) { unsigned long addr; - u16 apicid; - int physnid; - int nid = NUMA_NO_NODE; + int physnid, nid; - apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - if (apicid != BAD_APICID) - nid = apicid_to_node[apicid]; + nid = numa_cpu_node(cpu); if (nid == NUMA_NO_NODE) nid = early_cpu_to_node(cpu); BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6027a481000..48651c6f657 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void) num_memory_chunks); for (i = 0; i < MAX_LOCAL_APIC; i++) - apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); + set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 603d285d1da..9a97261a241 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -79,7 +79,7 @@ static __init void bad_srat(void) printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; for (i = 0; i < MAX_LOCAL_APIC; i++) - apicid_to_node[i] = NUMA_NO_NODE; + set_apicid_to_node(i, NUMA_NO_NODE); for (i = 0; i < MAX_NUMNODES; i++) { nodes[i].start = nodes[i].end = 0; nodes_add[i].start = nodes_add[i].end = 0; @@ -138,7 +138,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); return; } - apicid_to_node[apic_id] = node; + set_apicid_to_node(apic_id, node); node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", @@ -178,7 +178,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) return; } - apicid_to_node[apic_id] = node; + set_apicid_to_node(apic_id, node); node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", @@ -521,7 +521,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) * node, it must now point to the fake node ID. */ for (j = 0; j < MAX_LOCAL_APIC; j++) - if (apicid_to_node[j] == nid && + if (__apicid_to_node[j] == nid && fake_apicid_to_node[j] == NUMA_NO_NODE) fake_apicid_to_node[j] = i; } @@ -532,13 +532,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) * value. */ for (i = 0; i < MAX_LOCAL_APIC; i++) - if (apicid_to_node[i] != NUMA_NO_NODE && + if (__apicid_to_node[i] != NUMA_NO_NODE && fake_apicid_to_node[i] == NUMA_NO_NODE) fake_apicid_to_node[i] = 0; for (i = 0; i < num_nodes; i++) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); - memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); + memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); nodes_clear(nodes_parsed); for (i = 0; i < num_nodes; i++) -- cgit v1.2.3-70-g09d2 From 940fed2e79a15cf0d006c860d7811adbe5c19882 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:06 +0100 Subject: x86-64, NUMA: Unify {acpi|amd}_{numa_init|scan_nodes}() arguments and return values The functions used during NUMA initialization - *_numa_init() and *_scan_nodes() - have different arguments and return values. Unify them such that they all take no argument and return 0 on success and -errno on failure. This is in preparation for further NUMA init cleanups. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/amd_nb.h | 2 +- arch/x86/kernel/setup.c | 4 ++-- arch/x86/mm/amdtopology_64.c | 18 +++++++++--------- arch/x86/mm/numa_64.c | 2 +- arch/x86/mm/srat_64.c | 4 ++-- drivers/acpi/numa.c | 9 ++++++--- 7 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 211ca3f7fd1..4e5dff9e0b3 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -187,7 +187,7 @@ struct bootnode; extern int acpi_numa; extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, unsigned long end); -extern int acpi_scan_nodes(unsigned long start, unsigned long end); +extern int acpi_scan_nodes(void); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) #ifdef CONFIG_NUMA_EMU diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 2b33c4df979..dc3c6e34da1 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -16,7 +16,7 @@ struct bootnode; extern int early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); -extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); +extern int amd_numa_init(void); extern int amd_scan_nodes(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, int); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 756d640723f..96810a3c600 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -995,12 +995,12 @@ void __init setup_arch(char **cmdline_p) /* * Parse SRAT to discover nodes. */ - acpi = acpi_numa_init(); + acpi = !acpi_numa_init(); #endif #ifdef CONFIG_AMD_NUMA if (!acpi) - amd = !amd_numa_init(0, max_pfn); + amd = !amd_numa_init(); #endif initmem_init(acpi, amd); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 2523c3554de..655ccffc6ee 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -51,7 +51,7 @@ static __init int find_northbridge(void) return num; } - return -1; + return -ENOENT; } static __init void early_get_boot_cpu_id(void) @@ -69,17 +69,17 @@ static __init void early_get_boot_cpu_id(void) #endif } -int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) +int __init amd_numa_init(void) { - unsigned long start = PFN_PHYS(start_pfn); - unsigned long end = PFN_PHYS(end_pfn); + unsigned long start = PFN_PHYS(0); + unsigned long end = PFN_PHYS(max_pfn); unsigned numnodes; unsigned long prevbase; int i, nb, found = 0; u32 nodeid, reg; if (!early_pci_allowed()) - return -1; + return -EINVAL; nb = find_northbridge(); if (nb < 0) @@ -90,7 +90,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) reg = read_pci_config(0, nb, 0, 0x60); numnodes = ((reg >> 4) & 0xF) + 1; if (numnodes <= 1) - return -1; + return -ENOENT; pr_info("Number of physical nodes %d\n", numnodes); @@ -121,7 +121,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) if ((base >> 8) & 3 || (limit >> 8) & 3) { pr_err("Node %d using interleaving mode %lx/%lx\n", nodeid, (base >> 8) & 3, (limit >> 8) & 3); - return -1; + return -EINVAL; } if (node_isset(nodeid, nodes_parsed)) { pr_info("Node %d already present, skipping\n", @@ -160,7 +160,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) if (prevbase > base) { pr_err("Node map not sorted %lx,%lx\n", prevbase, base); - return -1; + return -EINVAL; } pr_info("Node %d MemBase %016lx Limit %016lx\n", @@ -177,7 +177,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) } if (!found) - return -1; + return -ENOENT; return 0; } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index d7e4aafd075..a083f515f00 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -596,7 +596,7 @@ void __init initmem_init(int acpi, int amd) #endif #ifdef CONFIG_ACPI_NUMA - if (!numa_off && acpi && !acpi_scan_nodes(0, max_pfn << PAGE_SHIFT)) + if (!numa_off && acpi && !acpi_scan_nodes()) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 988b0b70ff3..4f9dbf066ca 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -359,7 +359,7 @@ void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, #endif /* CONFIG_NUMA_EMU */ /* Use the information discovered above to actually set up the nodes. */ -int __init acpi_scan_nodes(unsigned long start, unsigned long end) +int __init acpi_scan_nodes(void) { int i; @@ -368,7 +368,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) /* First clean up the node list */ for (i = 0; i < MAX_NUMNODES; i++) - cutoff_node(i, start, end); + cutoff_node(i, 0, max_pfn << PAGE_SHIFT); /* * Join together blocks on the same node, holes between diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 5eb25eb3ea4..3b5c3189fd9 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -274,7 +274,7 @@ acpi_table_parse_srat(enum acpi_srat_type id, int __init acpi_numa_init(void) { - int ret = 0; + int cnt = 0; /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= @@ -288,7 +288,7 @@ int __init acpi_numa_init(void) acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, acpi_parse_processor_affinity, 0); - ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); } @@ -297,7 +297,10 @@ int __init acpi_numa_init(void) acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - return ret; + + if (cnt <= 0) + return cnt ?: -ENOENT; + return 0; } int acpi_get_pxm(acpi_handle h) -- cgit v1.2.3-70-g09d2 From ec8cf29b1d39aeb6ef98bc589f0c9a33a8f94c49 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:07 +0100 Subject: x86-64, NUMA: Use common {cpu|mem}_nodes_parsed ACPI and amd are using separate nodes_parsed masks. Add {cpu|mem}_nodes_parsed and use them in all NUMA init methods. Initialization of the masks and building node_possible_map are now handled commonly by initmem_init(). dummy_numa_init() is updated to set node 0 on both masks. While at it, move the info messages from scan to init. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 3 +++ arch/x86/mm/amdtopology_64.c | 10 ++++------ arch/x86/mm/numa_64.c | 25 ++++++++++++++++++------- arch/x86/mm/srat_64.c | 17 ++++++----------- 4 files changed, 31 insertions(+), 24 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 2819afa3363..de459365d52 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -27,6 +27,9 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, */ #define NODE_MIN_SIZE (4*1024*1024) +extern nodemask_t cpu_nodes_parsed __initdata; +extern nodemask_t mem_nodes_parsed __initdata; + extern int __cpuinit numa_cpu_node(int cpu); #ifdef CONFIG_NUMA_EMU diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 655ccffc6ee..4f822a24712 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -28,7 +28,6 @@ static struct bootnode __initdata nodes[8]; static unsigned char __initdata nodeids[8]; -static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; static __init int find_northbridge(void) { @@ -123,7 +122,7 @@ int __init amd_numa_init(void) nodeid, (base >> 8) & 3, (limit >> 8) & 3); return -EINVAL; } - if (node_isset(nodeid, nodes_parsed)) { + if (node_isset(nodeid, mem_nodes_parsed)) { pr_info("Node %d already present, skipping\n", nodeid); continue; @@ -173,7 +172,8 @@ int __init amd_numa_init(void) prevbase = base; - node_set(nodeid, nodes_parsed); + node_set(nodeid, mem_nodes_parsed); + node_set(nodeid, cpu_nodes_parsed); } if (!found) @@ -190,7 +190,7 @@ void __init amd_get_nodes(struct bootnode *physnodes) { int i; - for_each_node_mask(i, nodes_parsed) { + for_each_node_mask(i, mem_nodes_parsed) { physnodes[i].start = nodes[i].start; physnodes[i].end = nodes[i].end; } @@ -258,8 +258,6 @@ int __init amd_scan_nodes(void) unsigned int apicid_base; int i; - BUG_ON(nodes_empty(nodes_parsed)); - node_possible_map = nodes_parsed; memnode_shift = compute_hash_shift(nodes, 8, NULL); if (memnode_shift < 0) { pr_err("No NUMA node hash function found. Contact maintainer\n"); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c984e3431cc..4404e1d649a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -25,6 +25,9 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); +nodemask_t cpu_nodes_parsed __initdata; +nodemask_t mem_nodes_parsed __initdata; + struct memnode memnode; static unsigned long __initdata nodemap_addr; @@ -581,23 +584,24 @@ static int __init numa_emulation(unsigned long start_pfn, #endif /* CONFIG_NUMA_EMU */ static int dummy_numa_init(void) -{ - return 0; -} - -static int dummy_scan_nodes(void) { printk(KERN_INFO "%s\n", numa_off ? "NUMA turned off" : "No NUMA configuration found"); printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 0LU, max_pfn << PAGE_SHIFT); + node_set(0, cpu_nodes_parsed); + node_set(0, mem_nodes_parsed); + + return 0; +} + +static int dummy_scan_nodes(void) +{ /* setup dummy node covering all memory */ memnode_shift = 63; memnodemap = memnode.embedded_map; memnodemap[0] = 0; - node_set_online(0); - node_set(0, node_possible_map); memblock_x86_register_active_regions(0, 0, max_pfn); init_memory_mapping_high(); setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT); @@ -630,6 +634,8 @@ void __init initmem_init(void) for (j = 0; j < MAX_LOCAL_APIC; j++) set_apicid_to_node(j, NUMA_NO_NODE); + nodes_clear(cpu_nodes_parsed); + nodes_clear(mem_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -643,6 +649,11 @@ void __init initmem_init(void) nodes_clear(node_possible_map); nodes_clear(node_online_map); #endif + /* Account for nodes with cpus and no memory */ + nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); + if (WARN_ON(nodes_empty(node_possible_map))) + continue; + if (!scan_nodes[i]()) return; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 597e011cfb5..33e72ec4fa4 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -28,8 +28,6 @@ int acpi_numa __initdata; static struct acpi_table_slit *acpi_slit; -static nodemask_t nodes_parsed __initdata; -static nodemask_t cpu_nodes_parsed __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES]; @@ -293,7 +291,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { nd = &nodes[node]; - if (!node_test_and_set(node, nodes_parsed)) { + if (!node_test_and_set(node, mem_nodes_parsed)) { nd->start = start; nd->end = end; } else { @@ -319,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) unsigned long pxmram, e820ram; pxmram = 0; - for_each_node_mask(i, nodes_parsed) { + for_each_node_mask(i, mem_nodes_parsed) { unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long e = nodes[i].end >> PAGE_SHIFT; pxmram += e - s; @@ -348,7 +346,7 @@ void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, { int i; - for_each_node_mask(i, nodes_parsed) { + for_each_node_mask(i, mem_nodes_parsed) { cutoff_node(i, start, end); physnodes[i].start = nodes[i].start; physnodes[i].end = nodes[i].end; @@ -449,9 +447,6 @@ int __init acpi_scan_nodes(void) init_memory_mapping_high(); - /* Account for nodes with cpus and no memory */ - nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); - /* Finally register nodes */ for_each_node_mask(i, node_possible_map) setup_node_bootmem(i, nodes[i].start, nodes[i].end); @@ -485,7 +480,7 @@ static int __init find_node_by_addr(unsigned long addr) int ret = NUMA_NO_NODE; int i; - for_each_node_mask(i, nodes_parsed) { + for_each_node_mask(i, mem_nodes_parsed) { /* * Find the real node that this emulated node appears on. For * the sake of simplicity, we only use a real node's starting @@ -545,10 +540,10 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); - nodes_clear(nodes_parsed); + nodes_clear(mem_nodes_parsed); for (i = 0; i < num_nodes; i++) if (fake_nodes[i].start != fake_nodes[i].end) - node_set(i, nodes_parsed); + node_set(i, mem_nodes_parsed); } static int null_slit_node_compare(int a, int b) -- cgit v1.2.3-70-g09d2 From 99df738cd28cc39054cd1a77685d4a94ed2193a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:07 +0100 Subject: x86-64, NUMA: Remove local variable found from amd_numa_init() Use weight count on mem_nodes_parsed instead. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/mm/amdtopology_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 4f822a24712..7d85cf7e032 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -74,7 +74,7 @@ int __init amd_numa_init(void) unsigned long end = PFN_PHYS(max_pfn); unsigned numnodes; unsigned long prevbase; - int i, nb, found = 0; + int i, nb; u32 nodeid, reg; if (!early_pci_allowed()) @@ -165,8 +165,6 @@ int __init amd_numa_init(void) pr_info("Node %d MemBase %016lx Limit %016lx\n", nodeid, base, limit); - found++; - nodes[nodeid].start = base; nodes[nodeid].end = limit; @@ -176,7 +174,7 @@ int __init amd_numa_init(void) node_set(nodeid, cpu_nodes_parsed); } - if (!found) + if (!nodes_weight(mem_nodes_parsed)) return -ENOENT; return 0; } -- cgit v1.2.3-70-g09d2 From 45fe6c78c4ccc384044d1b4877eebe7acf359e76 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:07 +0100 Subject: x86-64, NUMA: Move apicid to numa mapping initialization from amd_scan_nodes() to amd_numa_init() This brings amd initialization behavior closer to that of acpi. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/mm/amdtopology_64.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 7d85cf7e032..b6029a6b129 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -74,8 +74,9 @@ int __init amd_numa_init(void) unsigned long end = PFN_PHYS(max_pfn); unsigned numnodes; unsigned long prevbase; - int i, nb; + int i, j, nb; u32 nodeid, reg; + unsigned int bits, cores, apicid_base; if (!early_pci_allowed()) return -EINVAL; @@ -176,6 +177,26 @@ int __init amd_numa_init(void) if (!nodes_weight(mem_nodes_parsed)) return -ENOENT; + + /* + * We seem to have valid NUMA configuration. Map apicids to nodes + * using the coreid bits from early_identify_cpu. + */ + bits = boot_cpu_data.x86_coreid_bits; + cores = 1 << bits; + apicid_base = 0; + + /* get the APIC ID of the BSP early for systems with apicid lifting */ + early_get_boot_cpu_id(); + if (boot_cpu_physical_apicid > 0) { + pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } + + for_each_node_mask(i, cpu_nodes_parsed) + for (j = apicid_base; j < cores + apicid_base; j++) + set_apicid_to_node((i << bits) + j, i); + return 0; } @@ -251,9 +272,6 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) int __init amd_scan_nodes(void) { - unsigned int bits; - unsigned int cores; - unsigned int apicid_base; int i; memnode_shift = compute_hash_shift(nodes, 8, NULL); @@ -264,28 +282,13 @@ int __init amd_scan_nodes(void) pr_info("Using node hash shift of %d\n", memnode_shift); /* use the coreid bits from early_identify_cpu */ - bits = boot_cpu_data.x86_coreid_bits; - cores = (1< 0) { - pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); - apicid_base = boot_cpu_physical_apicid; - } - for_each_node_mask(i, node_possible_map) memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); init_memory_mapping_high(); - for_each_node_mask(i, node_possible_map) { - int j; - - for (j = apicid_base; j < cores + apicid_base; j++) - set_apicid_to_node((i << bits) + j, i); + for_each_node_mask(i, node_possible_map) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - } numa_init_array(); return 0; -- cgit v1.2.3-70-g09d2 From 206e42087a037fa3adca8908fd318a0cb64d4dee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:07 +0100 Subject: x86-64, NUMA: Use common numa_nodes[] ACPI and amd are using separate nodes[] array. Add numa_nodes[] and use them in all NUMA init methods. cutoff_node() cleanup is moved from srat_64.c to numa_64.c and applied in initmem_init() regardless of init methods. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 1 + arch/x86/mm/amdtopology_64.c | 19 +++++++++---------- arch/x86/mm/numa_64.c | 24 +++++++++++++++++++++++ arch/x86/mm/srat_64.c | 43 +++++++++++------------------------------- 4 files changed, 45 insertions(+), 42 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index de459365d52..d3a45147d09 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -29,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, extern nodemask_t cpu_nodes_parsed __initdata; extern nodemask_t mem_nodes_parsed __initdata; +extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata; extern int __cpuinit numa_cpu_node(int cpu); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index b6029a6b129..f049fa67ed7 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -26,7 +26,6 @@ #include #include -static struct bootnode __initdata nodes[8]; static unsigned char __initdata nodeids[8]; static __init int find_northbridge(void) @@ -166,8 +165,8 @@ int __init amd_numa_init(void) pr_info("Node %d MemBase %016lx Limit %016lx\n", nodeid, base, limit); - nodes[nodeid].start = base; - nodes[nodeid].end = limit; + numa_nodes[nodeid].start = base; + numa_nodes[nodeid].end = limit; prevbase = base; @@ -210,8 +209,8 @@ void __init amd_get_nodes(struct bootnode *physnodes) int i; for_each_node_mask(i, mem_nodes_parsed) { - physnodes[i].start = nodes[i].start; - physnodes[i].end = nodes[i].end; + physnodes[i].start = numa_nodes[i].start; + physnodes[i].end = numa_nodes[i].end; } } @@ -221,7 +220,7 @@ static int __init find_node_by_addr(unsigned long addr) int i; for (i = 0; i < 8; i++) - if (addr >= nodes[i].start && addr < nodes[i].end) { + if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { ret = i; break; } @@ -274,7 +273,7 @@ int __init amd_scan_nodes(void) { int i; - memnode_shift = compute_hash_shift(nodes, 8, NULL); + memnode_shift = compute_hash_shift(numa_nodes, 8, NULL); if (memnode_shift < 0) { pr_err("No NUMA node hash function found. Contact maintainer\n"); return -1; @@ -284,11 +283,11 @@ int __init amd_scan_nodes(void) /* use the coreid bits from early_identify_cpu */ for_each_node_mask(i, node_possible_map) memblock_x86_register_active_regions(i, - nodes[i].start >> PAGE_SHIFT, - nodes[i].end >> PAGE_SHIFT); + numa_nodes[i].start >> PAGE_SHIFT, + numa_nodes[i].end >> PAGE_SHIFT); init_memory_mapping_high(); for_each_node_mask(i, node_possible_map) - setup_node_bootmem(i, nodes[i].start, nodes[i].end); + setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); numa_init_array(); return 0; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 4404e1d649a..a6b899f7ddd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -33,6 +33,8 @@ struct memnode memnode; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; +struct bootnode numa_nodes[MAX_NUMNODES] __initdata; + /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -182,6 +184,22 @@ static void * __init early_node_mem(int nodeid, unsigned long start, return NULL; } +static __init void cutoff_node(int i, unsigned long start, unsigned long end) +{ + struct bootnode *nd = &numa_nodes[i]; + + if (nd->start < start) { + nd->start = start; + if (nd->end < nd->start) + nd->start = nd->end; + } + if (nd->end > end) { + nd->end = end; + if (nd->start > nd->end) + nd->start = nd->end; + } +} + /* Initialize bootmem allocator for a node */ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) @@ -638,9 +656,15 @@ void __init initmem_init(void) nodes_clear(mem_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); + memset(numa_nodes, 0, sizeof(numa_nodes)); if (numa_init[i]() < 0) continue; + + /* clean up the node list */ + for (j = 0; j < MAX_NUMNODES; j++) + cutoff_node(j, 0, max_pfn << PAGE_SHIFT); + #ifdef CONFIG_NUMA_EMU setup_physnodes(0, max_pfn << PAGE_SHIFT, i == 0, i == 1); if (cmdline && !numa_emulation(0, max_pfn, i == 0, i == 1)) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 33e72ec4fa4..bfa4a6af5cf 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -28,7 +28,6 @@ int acpi_numa __initdata; static struct acpi_table_slit *acpi_slit; -static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES]; static int num_node_memblks __initdata; @@ -55,29 +54,13 @@ static __init int conflicting_memblks(unsigned long start, unsigned long end) return -1; } -static __init void cutoff_node(int i, unsigned long start, unsigned long end) -{ - struct bootnode *nd = &nodes[i]; - - if (nd->start < start) { - nd->start = start; - if (nd->end < nd->start) - nd->start = nd->end; - } - if (nd->end > end) { - nd->end = end; - if (nd->start > nd->end) - nd->start = nd->end; - } -} - static __init void bad_srat(void) { int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; for (i = 0; i < MAX_NUMNODES; i++) { - nodes[i].start = nodes[i].end = 0; + numa_nodes[i].start = numa_nodes[i].end = 0; nodes_add[i].start = nodes_add[i].end = 0; } remove_all_active_ranges(); @@ -276,12 +259,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if (i == node) { printk(KERN_WARNING "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", - pxm, start, end, nodes[i].start, nodes[i].end); + pxm, start, end, numa_nodes[i].start, numa_nodes[i].end); } else if (i >= 0) { printk(KERN_ERR "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n", pxm, start, end, node_to_pxm(i), - nodes[i].start, nodes[i].end); + numa_nodes[i].start, numa_nodes[i].end); bad_srat(); return; } @@ -290,7 +273,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start, end); if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { - nd = &nodes[node]; + nd = &numa_nodes[node]; if (!node_test_and_set(node, mem_nodes_parsed)) { nd->start = start; nd->end = end; @@ -347,9 +330,8 @@ void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, int i; for_each_node_mask(i, mem_nodes_parsed) { - cutoff_node(i, start, end); - physnodes[i].start = nodes[i].start; - physnodes[i].end = nodes[i].end; + physnodes[i].start = numa_nodes[i].start; + physnodes[i].end = numa_nodes[i].end; } } #endif /* CONFIG_NUMA_EMU */ @@ -372,10 +354,6 @@ int __init acpi_scan_nodes(void) if (acpi_numa <= 0) return -1; - /* First clean up the node list */ - for (i = 0; i < MAX_NUMNODES; i++) - cutoff_node(i, 0, max_pfn << PAGE_SHIFT); - /* * Join together blocks on the same node, holes between * which don't overlap with memory on other nodes. @@ -440,7 +418,7 @@ int __init acpi_scan_nodes(void) /* for out of order entries in SRAT */ sort_node_map(); - if (!nodes_cover_memory(nodes)) { + if (!nodes_cover_memory(numa_nodes)) { bad_srat(); return -1; } @@ -449,12 +427,13 @@ int __init acpi_scan_nodes(void) /* Finally register nodes */ for_each_node_mask(i, node_possible_map) - setup_node_bootmem(i, nodes[i].start, nodes[i].end); + setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); /* Try again in case setup_node_bootmem missed one due to missing bootmem */ for_each_node_mask(i, node_possible_map) if (!node_online(i)) - setup_node_bootmem(i, nodes[i].start, nodes[i].end); + setup_node_bootmem(i, numa_nodes[i].start, + numa_nodes[i].end); for (i = 0; i < nr_cpu_ids; i++) { int node = early_cpu_to_node(i); @@ -486,7 +465,7 @@ static int __init find_node_by_addr(unsigned long addr) * the sake of simplicity, we only use a real node's starting * address to determine which emulated node it appears on. */ - if (addr >= nodes[i].start && addr < nodes[i].end) { + if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { ret = i; break; } -- cgit v1.2.3-70-g09d2 From 19095548704ecd0f32fd5deba01d56430ad7a344 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 12:13:07 +0100 Subject: x86-64, NUMA: Kill {acpi|amd}_get_nodes() With common numa_nodes[], common code in numa_64.c can access it directly. Copy directly and kill {acpi|amd}_get_nodes(). Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 2 -- arch/x86/include/asm/amd_nb.h | 1 - arch/x86/mm/amdtopology_64.c | 10 ---------- arch/x86/mm/numa_64.c | 23 ++++++++++------------- arch/x86/mm/srat_64.c | 13 ------------- 5 files changed, 10 insertions(+), 39 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 06fb7865eff..446a5b9a1d5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -185,8 +185,6 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; -extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, - unsigned long end); extern int x86_acpi_numa_init(void); extern int acpi_scan_nodes(void); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index dc3c6e34da1..246cdc6ca9b 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -23,7 +23,6 @@ extern int amd_set_subcaches(int, int); #ifdef CONFIG_NUMA_EMU extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); -extern void amd_get_nodes(struct bootnode *nodes); #endif struct amd_northbridge { diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index f049fa67ed7..cf29527885f 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -204,16 +204,6 @@ static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -void __init amd_get_nodes(struct bootnode *physnodes) -{ - int i; - - for_each_node_mask(i, mem_nodes_parsed) { - physnodes[i].start = numa_nodes[i].start; - physnodes[i].end = numa_nodes[i].end; - } -} - static int __init find_node_by_addr(unsigned long addr) { int ret = NUMA_NO_NODE; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a6b899f7ddd..82ee3083b09 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -257,21 +257,18 @@ void __init numa_emu_cmdline(char *str) cmdline = str; } -static int __init setup_physnodes(unsigned long start, unsigned long end, - int acpi, int amd) +static int __init setup_physnodes(unsigned long start, unsigned long end) { int ret = 0; int i; memset(physnodes, 0, sizeof(physnodes)); -#ifdef CONFIG_ACPI_NUMA - if (acpi) - acpi_get_nodes(physnodes, start, end); -#endif -#ifdef CONFIG_AMD_NUMA - if (amd) - amd_get_nodes(physnodes); -#endif + + for_each_node_mask(i, mem_nodes_parsed) { + physnodes[i].start = numa_nodes[i].start; + physnodes[i].end = numa_nodes[i].end; + } + /* * Basic sanity checking on the physical node map: there may be errors * if the SRAT or AMD code incorrectly reported the topology or the mem= @@ -594,7 +591,7 @@ static int __init numa_emulation(unsigned long start_pfn, init_memory_mapping_high(); for_each_node_mask(i, node_possible_map) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - setup_physnodes(addr, max_addr, acpi, amd); + setup_physnodes(addr, max_addr); fake_physnodes(acpi, amd, num_nodes); numa_init_array(); return 0; @@ -666,10 +663,10 @@ void __init initmem_init(void) cutoff_node(j, 0, max_pfn << PAGE_SHIFT); #ifdef CONFIG_NUMA_EMU - setup_physnodes(0, max_pfn << PAGE_SHIFT, i == 0, i == 1); + setup_physnodes(0, max_pfn << PAGE_SHIFT); if (cmdline && !numa_emulation(0, max_pfn, i == 0, i == 1)) return; - setup_physnodes(0, max_pfn << PAGE_SHIFT, i == 0, i == 1); + setup_physnodes(0, max_pfn << PAGE_SHIFT); nodes_clear(node_possible_map); nodes_clear(node_online_map); #endif diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index bfa4a6af5cf..82b1087963a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -323,19 +323,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} -#ifdef CONFIG_NUMA_EMU -void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, - unsigned long end) -{ - int i; - - for_each_node_mask(i, mem_nodes_parsed) { - physnodes[i].start = numa_nodes[i].start; - physnodes[i].end = numa_nodes[i].end; - } -} -#endif /* CONFIG_NUMA_EMU */ - int __init x86_acpi_numa_init(void) { int ret; -- cgit v1.2.3-70-g09d2 From 43a662f04f731c331706456c9852ef7146ba5d85 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:08 +0100 Subject: x86-64, NUMA: Unify use of memblk in all init methods Make both amd and dummy use numa_add_memblk() to describe the detected memory blocks. This allows initmem_init() to call numa_register_memblk() regardless of init method in use. Drop custom memory registration codes from amd and dummy. After this change, memblk merge/cleanup in numa_register_memblks() is applied to all init methods. As this makes compute_hash_shift() and numa_register_memblks() used only inside numa_64.c, make them static. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 4 ---- arch/x86/mm/amdtopology_64.c | 13 +------------ arch/x86/mm/numa_64.c | 15 +++++++-------- arch/x86/mm/srat_64.c | 5 ----- 4 files changed, 8 insertions(+), 29 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 3306a2b99ec..e925605150a 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -8,9 +8,6 @@ struct bootnode { u64 end; }; -extern int compute_hash_shift(struct bootnode *nodes, int numblks, - int *nodeids); - #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) extern int numa_off; @@ -33,7 +30,6 @@ extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata; extern int __cpuinit numa_cpu_node(int cpu); extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); -extern int __init numa_register_memblks(void); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index cf29527885f..d6d7aa4b98c 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -167,6 +167,7 @@ int __init amd_numa_init(void) numa_nodes[nodeid].start = base; numa_nodes[nodeid].end = limit; + numa_add_memblk(nodeid, base, limit); prevbase = base; @@ -263,18 +264,6 @@ int __init amd_scan_nodes(void) { int i; - memnode_shift = compute_hash_shift(numa_nodes, 8, NULL); - if (memnode_shift < 0) { - pr_err("No NUMA node hash function found. Contact maintainer\n"); - return -1; - } - pr_info("Using node hash shift of %d\n", memnode_shift); - - /* use the coreid bits from early_identify_cpu */ - for_each_node_mask(i, node_possible_map) - memblock_x86_register_active_regions(i, - numa_nodes[i].start >> PAGE_SHIFT, - numa_nodes[i].end >> PAGE_SHIFT); init_memory_mapping_high(); for_each_node_mask(i, node_possible_map) setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a1d702d2584..552080e8472 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -131,8 +131,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, return i; } -int __init compute_hash_shift(struct bootnode *nodes, int numnodes, - int *nodeids) +static int __init compute_hash_shift(struct bootnode *nodes, int numnodes, + int *nodeids) { int shift; @@ -287,7 +287,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) node_set_online(nodeid); } -int __init numa_register_memblks(void) +static int __init numa_register_memblks(void) { int i; @@ -713,17 +713,13 @@ static int dummy_numa_init(void) node_set(0, cpu_nodes_parsed); node_set(0, mem_nodes_parsed); + numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); return 0; } static int dummy_scan_nodes(void) { - /* setup dummy node covering all memory */ - memnode_shift = 63; - memnodemap = memnode.embedded_map; - memnodemap[0] = 0; - memblock_x86_register_active_regions(0, 0, max_pfn); init_memory_mapping_high(); setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT); numa_init_array(); @@ -784,6 +780,9 @@ void __init initmem_init(void) if (WARN_ON(nodes_empty(node_possible_map))) continue; + if (numa_register_memblks() < 0) + continue; + if (!scan_nodes[i]()) return; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 341b37193c7..69f147116da 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -308,11 +308,6 @@ int __init acpi_scan_nodes(void) if (acpi_numa <= 0) return -1; - if (numa_register_memblks() < 0) { - bad_srat(); - return -1; - } - /* for out of order entries in SRAT */ sort_node_map(); if (!nodes_cover_memory(numa_nodes)) { -- cgit v1.2.3-70-g09d2 From fd0435d8fb1d4e5771f9ae3af71f2a77c1f4bd09 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:08 +0100 Subject: x86-64, NUMA: Unify the rest of memblk registration Move the remaining memblk registration logic from acpi_scan_nodes() to numa_register_memblks() and initmem_init(). This applies nodes_cover_memory() sanity check, memory node sorting and node_online() checking, which were only applied to acpi, to all init methods. As all memblk registration is moved to common code, active range clearing is moved to initmem_init() too and removed from bad_srat(). Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/mm/amdtopology_64.c | 7 ----- arch/x86/mm/numa_64.c | 74 ++++++++++++++++++++++++++++++++++++++++---- arch/x86/mm/srat_64.c | 61 ------------------------------------ 3 files changed, 68 insertions(+), 74 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index d6d7aa4b98c..9c9f46adf41 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -262,12 +262,5 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) int __init amd_scan_nodes(void) { - int i; - - init_memory_mapping_high(); - for_each_node_mask(i, node_possible_map) - setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); - - numa_init_array(); return 0; } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 552080e8472..748c6b5bff6 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -287,6 +287,37 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) node_set_online(nodeid); } +/* + * Sanity check to catch more bad NUMA configurations (they are amazingly + * common). Make sure the nodes cover all memory. + */ +static int __init nodes_cover_memory(const struct bootnode *nodes) +{ + unsigned long numaram, e820ram; + int i; + + numaram = 0; + for_each_node_mask(i, mem_nodes_parsed) { + unsigned long s = nodes[i].start >> PAGE_SHIFT; + unsigned long e = nodes[i].end >> PAGE_SHIFT; + numaram += e - s; + numaram -= __absent_pages_in_range(i, s, e); + if ((long)numaram < 0) + numaram = 0; + } + + e820ram = max_pfn - + (memblock_x86_hole_size(0, max_pfn<> PAGE_SHIFT); + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ + if ((long)(e820ram - numaram) >= (1<<(20 - PAGE_SHIFT))) { + printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", + (numaram << PAGE_SHIFT) >> 20, + (e820ram << PAGE_SHIFT) >> 20); + return 0; + } + return 1; +} + static int __init numa_register_memblks(void) { int i; @@ -349,6 +380,27 @@ static int __init numa_register_memblks(void) memblock_x86_register_active_regions(memblk_nodeid[i], node_memblk_range[i].start >> PAGE_SHIFT, node_memblk_range[i].end >> PAGE_SHIFT); + + /* for out of order entries */ + sort_node_map(); + if (!nodes_cover_memory(numa_nodes)) + return -EINVAL; + + init_memory_mapping_high(); + + /* Finally register nodes. */ + for_each_node_mask(i, node_possible_map) + setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); + + /* + * Try again in case setup_node_bootmem missed one due to missing + * bootmem. + */ + for_each_node_mask(i, node_possible_map) + if (!node_online(i)) + setup_node_bootmem(i, numa_nodes[i].start, + numa_nodes[i].end); + return 0; } @@ -714,16 +766,14 @@ static int dummy_numa_init(void) node_set(0, cpu_nodes_parsed); node_set(0, mem_nodes_parsed); numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); + numa_nodes[0].start = 0; + numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT; return 0; } static int dummy_scan_nodes(void) { - init_memory_mapping_high(); - setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT); - numa_init_array(); - return 0; } @@ -759,6 +809,7 @@ void __init initmem_init(void) memset(node_memblk_range, 0, sizeof(node_memblk_range)); memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); memset(numa_nodes, 0, sizeof(numa_nodes)); + remove_all_active_ranges(); if (numa_init[i]() < 0) continue; @@ -783,8 +834,19 @@ void __init initmem_init(void) if (numa_register_memblks() < 0) continue; - if (!scan_nodes[i]()) - return; + if (scan_nodes[i]() < 0) + continue; + + for (j = 0; j < nr_cpu_ids; j++) { + int nid = early_cpu_to_node(j); + + if (nid == NUMA_NO_NODE) + continue; + if (!node_online(nid)) + numa_clear_node(j); + } + numa_init_array(); + return; } BUG(); } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 69f147116da..4a2c33b0a48 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -44,7 +44,6 @@ static __init void bad_srat(void) numa_nodes[i].start = numa_nodes[i].end = 0; nodes_add[i].start = nodes_add[i].end = 0; } - remove_all_active_ranges(); } static __init inline int srat_disabled(void) @@ -259,35 +258,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) update_nodes_add(node, start, end); } -/* Sanity check to catch more bad SRATs (they are amazingly common). - Make sure the PXMs cover all memory. */ -static int __init nodes_cover_memory(const struct bootnode *nodes) -{ - int i; - unsigned long pxmram, e820ram; - - pxmram = 0; - for_each_node_mask(i, mem_nodes_parsed) { - unsigned long s = nodes[i].start >> PAGE_SHIFT; - unsigned long e = nodes[i].end >> PAGE_SHIFT; - pxmram += e - s; - pxmram -= __absent_pages_in_range(i, s, e); - if ((long)pxmram < 0) - pxmram = 0; - } - - e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<>PAGE_SHIFT); - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { - printk(KERN_ERR - "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", - (pxmram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return 0; - } - return 1; -} - void __init acpi_numa_arch_fixup(void) {} int __init x86_acpi_numa_init(void) @@ -303,39 +273,8 @@ int __init x86_acpi_numa_init(void) /* Use the information discovered above to actually set up the nodes. */ int __init acpi_scan_nodes(void) { - int i; - if (acpi_numa <= 0) return -1; - - /* for out of order entries in SRAT */ - sort_node_map(); - if (!nodes_cover_memory(numa_nodes)) { - bad_srat(); - return -1; - } - - init_memory_mapping_high(); - - /* Finally register nodes */ - for_each_node_mask(i, node_possible_map) - setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); - /* Try again in case setup_node_bootmem missed one due - to missing bootmem */ - for_each_node_mask(i, node_possible_map) - if (!node_online(i)) - setup_node_bootmem(i, numa_nodes[i].start, - numa_nodes[i].end); - - for (i = 0; i < nr_cpu_ids; i++) { - int node = early_cpu_to_node(i); - - if (node == NUMA_NO_NODE) - continue; - if (!node_online(node)) - numa_clear_node(i); - } - numa_init_array(); return 0; } -- cgit v1.2.3-70-g09d2 From 5d371b08fea80c4fb7450d31e5a4e35b438ef850 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:08 +0100 Subject: x86-64, NUMA: Kill {acpi|amd|dummy}_scan_nodes() They are empty now. Kill them. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 1 - arch/x86/include/asm/amd_nb.h | 1 - arch/x86/mm/amdtopology_64.c | 5 ----- arch/x86/mm/numa_64.c | 11 ----------- arch/x86/mm/srat_64.c | 8 -------- 5 files changed, 26 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 12bd1fdd206..cfa3d5c3144 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -186,7 +186,6 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; extern int x86_acpi_numa_init(void); -extern int acpi_scan_nodes(void); #ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 246cdc6ca9b..384d1188e78 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -17,7 +17,6 @@ extern int early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); extern int amd_numa_init(void); -extern int amd_scan_nodes(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, int); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 9c9f46adf41..90cf297b3b5 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -259,8 +259,3 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); } #endif /* CONFIG_NUMA_EMU */ - -int __init amd_scan_nodes(void) -{ - return 0; -} diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 748c6b5bff6..e211c005f5e 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -772,25 +772,17 @@ static int dummy_numa_init(void) return 0; } -static int dummy_scan_nodes(void) -{ - return 0; -} - void __init initmem_init(void) { int (*numa_init[])(void) = { [2] = dummy_numa_init }; - int (*scan_nodes[])(void) = { [2] = dummy_scan_nodes }; int i, j; if (!numa_off) { #ifdef CONFIG_ACPI_NUMA numa_init[0] = x86_acpi_numa_init; - scan_nodes[0] = acpi_scan_nodes; #endif #ifdef CONFIG_AMD_NUMA numa_init[1] = amd_numa_init; - scan_nodes[1] = amd_scan_nodes; #endif } @@ -834,9 +826,6 @@ void __init initmem_init(void) if (numa_register_memblks() < 0) continue; - if (scan_nodes[i]() < 0) - continue; - for (j = 0; j < nr_cpu_ids; j++) { int nid = early_cpu_to_node(j); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 4a2c33b0a48..d56eff811cd 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -270,14 +270,6 @@ int __init x86_acpi_numa_init(void) return srat_disabled() ? -EINVAL : 0; } -/* Use the information discovered above to actually set up the nodes. */ -int __init acpi_scan_nodes(void) -{ - if (acpi_numa <= 0) - return -1; - return 0; -} - #ifdef CONFIG_NUMA_EMU static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = { [0 ... MAX_NUMNODES-1] = PXM_INVAL -- cgit v1.2.3-70-g09d2 From a844ef46fa3055165c28feede6114a711b8375ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:09 +0100 Subject: x86-64, NUMA: Add common find_node_by_addr() srat_64.c and amdtopology_64.c had their own versions of find_node_by_addr() which were basically the same. Add common one in numa_64.c and remove the duplicates. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 1 + arch/x86/mm/amdtopology_64.c | 13 ------------- arch/x86/mm/numa_64.c | 19 +++++++++++++++++++ arch/x86/mm/srat_64.c | 18 ------------------ 4 files changed, 20 insertions(+), 31 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index e925605150a..925ade9d67e 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -35,6 +35,7 @@ extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) void numa_emu_cmdline(char *); +int __init find_node_by_addr(unsigned long addr); #endif /* CONFIG_NUMA_EMU */ #else static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 90cf297b3b5..8f7a5eb4bd3 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -205,19 +205,6 @@ static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -static int __init find_node_by_addr(unsigned long addr) -{ - int ret = NUMA_NO_NODE; - int i; - - for (i = 0; i < 8; i++) - if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { - ret = i; - break; - } - return ret; -} - /* * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be * setup to represent the physical topology but reflect the emulated diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 20aa1d31e16..681bc0d59db 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -430,6 +430,25 @@ void __init numa_emu_cmdline(char *str) cmdline = str; } +int __init find_node_by_addr(unsigned long addr) +{ + int ret = NUMA_NO_NODE; + int i; + + for_each_node_mask(i, mem_nodes_parsed) { + /* + * Find the real node that this emulated node appears on. For + * the sake of simplicity, we only use a real node's starting + * address to determine which emulated node it appears on. + */ + if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { + ret = i; + break; + } + } + return ret; +} + static int __init setup_physnodes(unsigned long start, unsigned long end) { int ret = 0; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d56eff811cd..51d07338d2e 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -277,24 +277,6 @@ static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = { static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -static int __init find_node_by_addr(unsigned long addr) -{ - int ret = NUMA_NO_NODE; - int i; - - for_each_node_mask(i, mem_nodes_parsed) { - /* - * Find the real node that this emulated node appears on. For - * the sake of simplicity, we only use a real node's starting - * address to determine which emulated node it appears on. - */ - if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { - ret = i; - break; - } - } - return ret; -} /* * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID -- cgit v1.2.3-70-g09d2 From 91556237ec872e1029e3036174bae3b1a8df65eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:09 +0100 Subject: x86-64, NUMA: Kill numa_nodes[] numa_nodes[] doesn't carry any information which isn't present in numa_meminfo. Each entry is simply min/max range of all the memblks for the node. This is not only redundant but also inaccurate when memblks for different nodes interleave - for example, find_node_by_addr() can return the wrong nodeid. Kill numa_nodes[] and always use numa_meminfo instead. * nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and now operations on numa_meminfo and returns bool. * setup_node_bootmem() needs min/max range. Compute the range on the fly. setup_node_bootmem() invocation is restructured to use outer loop instead of hardcoding the double invocations. * find_node_by_addr() now operates on numa_meminfo. * setup_physnodes() builds physnodes[] from memblks. This will go away when emulation code is updated to use struct numa_meminfo. This patch also makes the following misc changes. * Clearing of nodes_add[] clearing is converted to memset(). * numa_add_memblk() in amd_numa_init() is moved down a bit for consistency. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 1 - arch/x86/mm/amdtopology_64.c | 6 +--- arch/x86/mm/numa_64.c | 82 ++++++++++++++++++++++++------------------ arch/x86/mm/srat_64.c | 22 +++--------- 4 files changed, 53 insertions(+), 58 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 925ade9d67e..20b69a98f37 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, extern nodemask_t cpu_nodes_parsed __initdata; extern nodemask_t mem_nodes_parsed __initdata; -extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata; extern int __cpuinit numa_cpu_node(int cpu); extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 8f7a5eb4bd3..0cb59e58200 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -165,12 +165,8 @@ int __init amd_numa_init(void) pr_info("Node %d MemBase %016lx Limit %016lx\n", nodeid, base, limit); - numa_nodes[nodeid].start = base; - numa_nodes[nodeid].end = limit; - numa_add_memblk(nodeid, base, limit); - prevbase = base; - + numa_add_memblk(nodeid, base, limit); node_set(nodeid, mem_nodes_parsed); node_set(nodeid, cpu_nodes_parsed); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 681bc0d59db..c490448d716 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size; static struct numa_meminfo numa_meminfo __initdata; -struct bootnode numa_nodes[MAX_NUMNODES] __initdata; - /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi) * Sanity check to catch more bad NUMA configurations (they are amazingly * common). Make sure the nodes cover all memory. */ -static int __init nodes_cover_memory(const struct bootnode *nodes) +static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) { unsigned long numaram, e820ram; int i; numaram = 0; - for_each_node_mask(i, mem_nodes_parsed) { - unsigned long s = nodes[i].start >> PAGE_SHIFT; - unsigned long e = nodes[i].end >> PAGE_SHIFT; + for (i = 0; i < mi->nr_blks; i++) { + unsigned long s = mi->blk[i].start >> PAGE_SHIFT; + unsigned long e = mi->blk[i].end >> PAGE_SHIFT; numaram += e - s; - numaram -= __absent_pages_in_range(i, s, e); + numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); if ((long)numaram < 0) numaram = 0; } @@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", (numaram << PAGE_SHIFT) >> 20, (e820ram << PAGE_SHIFT) >> 20); - return 0; + return false; } - return 1; + return true; } static int __init numa_register_memblks(struct numa_meminfo *mi) { - int i; + int i, j, nid; /* Account for nodes with cpus and no memory */ nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); @@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) /* for out of order entries */ sort_node_map(); - if (!nodes_cover_memory(numa_nodes)) + if (!numa_meminfo_cover_memory(mi)) return -EINVAL; init_memory_mapping_high(); - /* Finally register nodes. */ - for_each_node_mask(i, node_possible_map) - setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); - /* - * Try again in case setup_node_bootmem missed one due to missing - * bootmem. + * Finally register nodes. Do it twice in case setup_node_bootmem + * missed one due to missing bootmem. */ - for_each_node_mask(i, node_possible_map) - if (!node_online(i)) - setup_node_bootmem(i, numa_nodes[i].start, - numa_nodes[i].end); + for (i = 0; i < 2; i++) { + for_each_node_mask(nid, node_possible_map) { + u64 start = (u64)max_pfn << PAGE_SHIFT; + u64 end = 0; + + if (node_online(nid)) + continue; + + for (j = 0; j < mi->nr_blks; j++) { + if (nid != mi->blk[j].nid) + continue; + start = min(mi->blk[j].start, start); + end = max(mi->blk[j].end, end); + } + + if (start < end) + setup_node_bootmem(nid, start, end); + } + } return 0; } @@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str) int __init find_node_by_addr(unsigned long addr) { - int ret = NUMA_NO_NODE; + const struct numa_meminfo *mi = &numa_meminfo; int i; - for_each_node_mask(i, mem_nodes_parsed) { + for (i = 0; i < mi->nr_blks; i++) { /* * Find the real node that this emulated node appears on. For * the sake of simplicity, we only use a real node's starting * address to determine which emulated node it appears on. */ - if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { - ret = i; - break; - } + if (addr >= mi->blk[i].start && addr < mi->blk[i].end) + return mi->blk[i].nid; } - return ret; + return NUMA_NO_NODE; } static int __init setup_physnodes(unsigned long start, unsigned long end) { + const struct numa_meminfo *mi = &numa_meminfo; int ret = 0; int i; memset(physnodes, 0, sizeof(physnodes)); - for_each_node_mask(i, mem_nodes_parsed) { - physnodes[i].start = numa_nodes[i].start; - physnodes[i].end = numa_nodes[i].end; + for (i = 0; i < mi->nr_blks; i++) { + int nid = mi->blk[i].nid; + + if (physnodes[nid].start == physnodes[nid].end) { + physnodes[nid].start = mi->blk[i].start; + physnodes[nid].end = mi->blk[i].end; + } else { + physnodes[nid].start = min(physnodes[nid].start, + mi->blk[i].start); + physnodes[nid].end = max(physnodes[nid].end, + mi->blk[i].end); + } } /* @@ -809,8 +826,6 @@ static int dummy_numa_init(void) node_set(0, cpu_nodes_parsed); node_set(0, mem_nodes_parsed); numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); - numa_nodes[0].start = 0; - numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT; return 0; } @@ -841,7 +856,6 @@ void __init initmem_init(void) nodes_clear(node_possible_map); nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - memset(numa_nodes, 0, sizeof(numa_nodes)); remove_all_active_ranges(); if (numa_init[i]() < 0) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51d07338d2e..e8b3b3cb2c2 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -37,13 +37,9 @@ static __init int setup_node(int pxm) static __init void bad_srat(void) { - int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; - for (i = 0; i < MAX_NUMNODES; i++) { - numa_nodes[i].start = numa_nodes[i].end = 0; - nodes_add[i].start = nodes_add[i].end = 0; - } + memset(nodes_add, 0, sizeof(nodes_add)); } static __init inline int srat_disabled(void) @@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end) void __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { - struct bootnode *nd; unsigned long start, end; int node, pxm; @@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, start, end); - if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { - nd = &numa_nodes[node]; - if (!node_test_and_set(node, mem_nodes_parsed)) { - nd->start = start; - nd->end = end; - } else { - if (start < nd->start) - nd->start = start; - if (nd->end < end) - nd->end = end; - } - } else + if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) + node_set(node, mem_nodes_parsed); + else update_nodes_add(node, start, end); } -- cgit v1.2.3-70-g09d2 From 92d4a4371eeb89e1e12b9ebbed0956f499b6c2c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:09 +0100 Subject: x86-64, NUMA: Rename cpu_nodes_parsed to numa_nodes_parsed It's no longer necessary to keep both cpu_nodes_parsed and mem_nodes_parsed. In preparation for merge, rename cpu_nodes_parsed to numa_nodes_parsed. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 2 +- arch/x86/mm/amdtopology_64.c | 4 ++-- arch/x86/mm/numa_64.c | 8 ++++---- arch/x86/mm/srat_64.c | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 20b69a98f37..6e944696144 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -24,7 +24,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, */ #define NODE_MIN_SIZE (4*1024*1024) -extern nodemask_t cpu_nodes_parsed __initdata; +extern nodemask_t numa_nodes_parsed __initdata; extern nodemask_t mem_nodes_parsed __initdata; extern int __cpuinit numa_cpu_node(int cpu); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 0cb59e58200..e76bffabc09 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -168,7 +168,7 @@ int __init amd_numa_init(void) prevbase = base; numa_add_memblk(nodeid, base, limit); node_set(nodeid, mem_nodes_parsed); - node_set(nodeid, cpu_nodes_parsed); + node_set(nodeid, numa_nodes_parsed); } if (!nodes_weight(mem_nodes_parsed)) @@ -189,7 +189,7 @@ int __init amd_numa_init(void) apicid_base = boot_cpu_physical_apicid; } - for_each_node_mask(i, cpu_nodes_parsed) + for_each_node_mask(i, numa_nodes_parsed) for (j = apicid_base; j < cores + apicid_base; j++) set_apicid_to_node((i << bits) + j, i); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c490448d716..6e4fbd77756 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -36,7 +36,7 @@ struct numa_meminfo { struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -nodemask_t cpu_nodes_parsed __initdata; +nodemask_t numa_nodes_parsed __initdata; nodemask_t mem_nodes_parsed __initdata; struct memnode memnode; @@ -379,7 +379,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) int i, j, nid; /* Account for nodes with cpus and no memory */ - nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); + nodes_or(node_possible_map, mem_nodes_parsed, numa_nodes_parsed); if (WARN_ON(nodes_empty(node_possible_map))) return -EINVAL; @@ -823,7 +823,7 @@ static int dummy_numa_init(void) printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 0LU, max_pfn << PAGE_SHIFT); - node_set(0, cpu_nodes_parsed); + node_set(0, numa_nodes_parsed); node_set(0, mem_nodes_parsed); numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); @@ -851,7 +851,7 @@ void __init initmem_init(void) for (j = 0; j < MAX_LOCAL_APIC; j++) set_apicid_to_node(j, NUMA_NO_NODE); - nodes_clear(cpu_nodes_parsed); + nodes_clear(numa_nodes_parsed); nodes_clear(mem_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index e8b3b3cb2c2..8185189d34a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -94,7 +94,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) return; } set_apicid_to_node(apic_id, node); - node_set(node, cpu_nodes_parsed); + node_set(node, numa_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); @@ -134,7 +134,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) } set_apicid_to_node(apic_id, node); - node_set(node, cpu_nodes_parsed); + node_set(node, numa_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); @@ -196,7 +196,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end) } if (changed) { - node_set(node, cpu_nodes_parsed); + node_set(node, numa_nodes_parsed); printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); } -- cgit v1.2.3-70-g09d2 From 4697bdcc945c094d2c8a4876a24faeaf31a283e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:09 +0100 Subject: x86-64, NUMA: Kill mem_nodes_parsed With all memory configuration information now carried in numa_meminfo, there's no need to keep mem_nodes_parsed separate. Drop it and use numa_nodes_parsed for CPU / memory-less nodes. A new helper numa_nodemask_from_meminfo() is added to calculate memnode mask on the fly which is currently used to set node_possible_map. This simplifies NUMA init methods a bit and removes a source of possible inconsistencies. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 1 - arch/x86/mm/amdtopology_64.c | 5 ++--- arch/x86/mm/numa_64.c | 20 ++++++++++++++++---- arch/x86/mm/srat_64.c | 7 ++----- 4 files changed, 20 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 6e944696144..f42710bd3e7 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -25,7 +25,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, #define NODE_MIN_SIZE (4*1024*1024) extern nodemask_t numa_nodes_parsed __initdata; -extern nodemask_t mem_nodes_parsed __initdata; extern int __cpuinit numa_cpu_node(int cpu); extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index e76bffabc09..fd7b609025b 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -122,7 +122,7 @@ int __init amd_numa_init(void) nodeid, (base >> 8) & 3, (limit >> 8) & 3); return -EINVAL; } - if (node_isset(nodeid, mem_nodes_parsed)) { + if (node_isset(nodeid, numa_nodes_parsed)) { pr_info("Node %d already present, skipping\n", nodeid); continue; @@ -167,11 +167,10 @@ int __init amd_numa_init(void) prevbase = base; numa_add_memblk(nodeid, base, limit); - node_set(nodeid, mem_nodes_parsed); node_set(nodeid, numa_nodes_parsed); } - if (!nodes_weight(mem_nodes_parsed)) + if (!nodes_weight(numa_nodes_parsed)) return -ENOENT; /* diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 6e4fbd77756..8b1f178a866 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -37,7 +37,6 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); nodemask_t numa_nodes_parsed __initdata; -nodemask_t mem_nodes_parsed __initdata; struct memnode memnode; @@ -343,6 +342,20 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi) return 0; } +/* + * Set nodes, which have memory in @mi, in *@nodemask. + */ +static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, + const struct numa_meminfo *mi) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mi->blk); i++) + if (mi->blk[i].start != mi->blk[i].end && + mi->blk[i].nid != NUMA_NO_NODE) + node_set(mi->blk[i].nid, *nodemask); +} + /* * Sanity check to catch more bad NUMA configurations (they are amazingly * common). Make sure the nodes cover all memory. @@ -379,7 +392,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) int i, j, nid; /* Account for nodes with cpus and no memory */ - nodes_or(node_possible_map, mem_nodes_parsed, numa_nodes_parsed); + node_possible_map = numa_nodes_parsed; + numa_nodemask_from_meminfo(&node_possible_map, mi); if (WARN_ON(nodes_empty(node_possible_map))) return -EINVAL; @@ -824,7 +838,6 @@ static int dummy_numa_init(void) 0LU, max_pfn << PAGE_SHIFT); node_set(0, numa_nodes_parsed); - node_set(0, mem_nodes_parsed); numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); return 0; @@ -852,7 +865,6 @@ void __init initmem_init(void) set_apicid_to_node(j, NUMA_NO_NODE); nodes_clear(numa_nodes_parsed); - nodes_clear(mem_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 8185189d34a..4f8e6cde9bf 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -238,9 +238,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, start, end); - if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) - node_set(node, mem_nodes_parsed); - else + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) update_nodes_add(node, start, end); } @@ -310,10 +308,9 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); - nodes_clear(mem_nodes_parsed); for (i = 0; i < num_nodes; i++) if (fake_nodes[i].start != fake_nodes[i].end) - node_set(i, mem_nodes_parsed); + node_set(i, numa_nodes_parsed); } static int null_slit_node_compare(int a, int b) -- cgit v1.2.3-70-g09d2 From 6b78cb549b4105cbf7c6f7461f27a21f00c44997 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:10 +0100 Subject: x86-64, NUMA: Unify emulated apicid -> node mapping transformation NUMA emulation changes node mappings and thus apicid -> node mapping needs to be updated accordingly. srat_64 and amdtopology_64 did this separately; however, all the necessary information is the mapping from emulated nodes to physical nodes which is available in emu_nid_to_phys[]. Implement common __apicid_to_node[] transformation in numa_emulation() and drop duplicate implementations. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/mm/amdtopology_64.c | 9 --------- arch/x86/mm/numa_64.c | 16 +++++++++++++++- arch/x86/mm/srat_64.c | 24 +----------------------- 3 files changed, 16 insertions(+), 33 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index fd7b609025b..f37ea2fe85e 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -196,10 +196,6 @@ int __init amd_numa_init(void) } #ifdef CONFIG_NUMA_EMU -static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { - [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE -}; - /* * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be * setup to represent the physical topology but reflect the emulated @@ -224,20 +220,15 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) for (i = 0; i < nr_nodes; i++) { int index; int nid; - int j; nid = find_node_by_addr(nodes[i].start); if (nid == NUMA_NO_NODE) continue; index = nodeids[nid] << bits; - if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE) - for (j = apicid_base; j < cores + apicid_base; j++) - fake_apicid_to_node[index + j] = i; #ifdef CONFIG_ACPI_NUMA __acpi_map_pxm_to_node(nid, i); #endif } - memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); } #endif /* CONFIG_NUMA_EMU */ diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index bd086ebc0ff..722039e0948 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -858,7 +858,7 @@ static bool __init numa_emulation(int acpi, int amd) static struct numa_meminfo ei __initdata; static struct numa_meminfo pi __initdata; const u64 max_addr = max_pfn << PAGE_SHIFT; - int i, ret; + int i, j, ret; memset(&ei, 0, sizeof(ei)); pi = numa_meminfo; @@ -894,6 +894,20 @@ static bool __init numa_emulation(int acpi, int amd) /* commit */ numa_meminfo = ei; + /* + * Transform __apicid_to_node table to use emulated nids by + * reverse-mapping phys_nid. The maps should always exist but fall + * back to zero just in case. + */ + for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) { + if (__apicid_to_node[i] == NUMA_NO_NODE) + continue; + for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) + if (__apicid_to_node[i] == emu_nid_to_phys[j]) + break; + __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0; + } + /* make sure all emulated nodes are mapped to a physical node */ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) if (emu_nid_to_phys[i] == NUMA_NO_NODE) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d2f53f35d86..d4fbfea5354 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -265,9 +265,6 @@ int __init x86_acpi_numa_init(void) static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = { [0 ... MAX_NUMNODES-1] = PXM_INVAL }; -static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { - [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE -}; /* * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID @@ -279,7 +276,7 @@ static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { */ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) { - int i, j; + int i; for (i = 0; i < num_nodes; i++) { int nid, pxm; @@ -291,29 +288,10 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) if (pxm == PXM_INVAL) continue; fake_node_to_pxm_map[i] = pxm; - /* - * For each apicid_to_node mapping that exists for this real - * node, it must now point to the fake node ID. - */ - for (j = 0; j < MAX_LOCAL_APIC; j++) - if (__apicid_to_node[j] == nid && - fake_apicid_to_node[j] == NUMA_NO_NODE) - fake_apicid_to_node[j] = i; } - /* - * If there are apicid-to-node mappings for physical nodes that do not - * have a corresponding emulated node, it should default to a guaranteed - * value. - */ - for (i = 0; i < MAX_LOCAL_APIC; i++) - if (__apicid_to_node[i] != NUMA_NO_NODE && - fake_apicid_to_node[i] == NUMA_NO_NODE) - fake_apicid_to_node[i] = 0; - for (i = 0; i < num_nodes; i++) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); - memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); for (i = 0; i < num_nodes; i++) if (fake_nodes[i].start != fake_nodes[i].end) -- cgit v1.2.3-70-g09d2 From e23bba604433a202cd301a976454a90ea6b783ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 17:11:10 +0100 Subject: x86-64, NUMA: Unify emulated distance mapping NUMA emulation needs to update node distance information. It did it by remapping apicid to PXM mapping, even when amdtopology is being used. There is no reason to go through such convolution. The generic code has all the information necessary to transform the distance table to the emulated nid space. Implement generic distance table transformation in numa_emulation() and drop private implementations in srat_64 and amdtopology_64. This makes find_node_by_addr() and fake_physnodes() and related functions unnecessary, drop them. Signed-off-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 6 --- arch/x86/include/asm/amd_nb.h | 4 -- arch/x86/include/asm/numa_64.h | 1 - arch/x86/mm/amdtopology_64.c | 38 --------------- arch/x86/mm/numa_64.c | 102 ++++++++++++++++------------------------- arch/x86/mm/srat_64.c | 65 -------------------------- 6 files changed, 40 insertions(+), 176 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 9c9fe1b0bc4..a37da6df07f 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -186,12 +186,6 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; extern int x86_acpi_numa_init(void); - -#ifdef CONFIG_NUMA_EMU -extern void acpi_fake_nodes(const struct bootnode *fake_nodes, - int num_nodes); -extern int acpi_emu_node_distance(int a, int b); -#endif #endif /* CONFIG_ACPI_NUMA */ #define acpi_unlazy_tlb(x) leave_mm(x) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 384d1188e78..e264ae5a144 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -20,10 +20,6 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, int); -#ifdef CONFIG_NUMA_EMU -extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); -#endif - struct amd_northbridge { struct pci_dev *misc; struct pci_dev *link; diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 5361c594798..344eb1790b4 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -34,7 +34,6 @@ extern void __init numa_set_distance(int from, int to, int distance); #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) void numa_emu_cmdline(char *); -int __init find_node_by_addr(unsigned long addr); #endif /* CONFIG_NUMA_EMU */ #else static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index f37ea2fe85e..0919c26820d 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -194,41 +194,3 @@ int __init amd_numa_init(void) return 0; } - -#ifdef CONFIG_NUMA_EMU -/* - * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be - * setup to represent the physical topology but reflect the emulated - * environment. For each emulated node, the real node which it appears on is - * found and a fake pxm to nid mapping is created which mirrors the actual - * locality. node_distance() then represents the correct distances between - * emulated nodes by using the fake acpi mappings to pxms. - */ -void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) -{ - unsigned int bits; - unsigned int cores; - unsigned int apicid_base = 0; - int i; - - bits = boot_cpu_data.x86_coreid_bits; - cores = 1 << bits; - early_get_boot_cpu_id(); - if (boot_cpu_physical_apicid > 0) - apicid_base = boot_cpu_physical_apicid; - - for (i = 0; i < nr_nodes; i++) { - int index; - int nid; - - nid = find_node_by_addr(nodes[i].start); - if (nid == NUMA_NO_NODE) - continue; - - index = nodeids[nid] << bits; -#ifdef CONFIG_ACPI_NUMA - __acpi_map_pxm_to_node(nid, i); -#endif - } -} -#endif /* CONFIG_NUMA_EMU */ diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 722039e0948..8ce61773590 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -48,10 +48,6 @@ static struct numa_meminfo numa_meminfo __initdata; static int numa_distance_cnt; static u8 *numa_distance; -#ifdef CONFIG_NUMA_EMU -static bool numa_emu_dist; -#endif - /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -443,10 +439,6 @@ void __init numa_set_distance(int from, int to, int distance) int __node_distance(int from, int to) { -#if defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA_EMU) - if (numa_emu_dist) - return acpi_emu_node_distance(from, to); -#endif if (from >= numa_distance_cnt || to >= numa_distance_cnt) return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; return numa_distance[from * numa_distance_cnt + to]; @@ -559,56 +551,6 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) return -ENOENT; } -int __init find_node_by_addr(unsigned long addr) -{ - const struct numa_meminfo *mi = &numa_meminfo; - int i; - - for (i = 0; i < mi->nr_blks; i++) { - /* - * Find the real node that this emulated node appears on. For - * the sake of simplicity, we only use a real node's starting - * address to determine which emulated node it appears on. - */ - if (addr >= mi->blk[i].start && addr < mi->blk[i].end) - return mi->blk[i].nid; - } - return NUMA_NO_NODE; -} - -static void __init fake_physnodes(int acpi, int amd, - const struct numa_meminfo *ei) -{ - static struct bootnode nodes[MAX_NUMNODES] __initdata; - int i, nr_nodes = 0; - - for (i = 0; i < ei->nr_blks; i++) { - int nid = ei->blk[i].nid; - - if (nodes[nid].start == nodes[nid].end) { - nodes[nid].start = ei->blk[i].start; - nodes[nid].end = ei->blk[i].end; - nr_nodes++; - } else { - nodes[nid].start = min(ei->blk[i].start, nodes[nid].start); - nodes[nid].end = max(ei->blk[i].end, nodes[nid].end); - } - } - - BUG_ON(acpi && amd); -#ifdef CONFIG_ACPI_NUMA - if (acpi) - acpi_fake_nodes(nodes, nr_nodes); -#endif -#ifdef CONFIG_AMD_NUMA - if (amd) - amd_fake_nodes(nodes, nr_nodes); -#endif - if (!acpi && !amd) - for (i = 0; i < nr_cpu_ids; i++) - numa_set_node(i, 0); -} - /* * Sets up nid to range from @start to @end. The return value is -errno if * something went wrong, 0 otherwise. @@ -853,11 +795,13 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, * Sets up the system RAM area from start_pfn to last_pfn according to the * numa=fake command-line option. */ -static bool __init numa_emulation(int acpi, int amd) +static bool __init numa_emulation(void) { static struct numa_meminfo ei __initdata; static struct numa_meminfo pi __initdata; const u64 max_addr = max_pfn << PAGE_SHIFT; + int phys_dist_cnt = numa_distance_cnt; + u8 *phys_dist = NULL; int i, j, ret; memset(&ei, 0, sizeof(ei)); @@ -891,6 +835,25 @@ static bool __init numa_emulation(int acpi, int amd) return false; } + /* + * Copy the original distance table. It's temporary so no need to + * reserve it. + */ + if (phys_dist_cnt) { + size_t size = phys_dist_cnt * sizeof(numa_distance[0]); + u64 phys; + + phys = memblock_find_in_range(0, + (u64)max_pfn_mapped << PAGE_SHIFT, + size, PAGE_SIZE); + if (phys == MEMBLOCK_ERROR) { + pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); + return false; + } + phys_dist = __va(phys); + memcpy(phys_dist, numa_distance, size); + } + /* commit */ numa_meminfo = ei; @@ -913,8 +876,23 @@ static bool __init numa_emulation(int acpi, int amd) if (emu_nid_to_phys[i] == NUMA_NO_NODE) emu_nid_to_phys[i] = 0; - fake_physnodes(acpi, amd, &ei); - numa_emu_dist = true; + /* transform distance table */ + numa_reset_distance(); + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) { + int physi = emu_nid_to_phys[i]; + int physj = emu_nid_to_phys[j]; + int dist; + + if (physi >= phys_dist_cnt || physj >= phys_dist_cnt) + dist = physi == physj ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + else + dist = phys_dist[physi * phys_dist_cnt + physj]; + + numa_set_distance(i, j, dist); + } + } return true; } #endif /* CONFIG_NUMA_EMU */ @@ -970,7 +948,7 @@ void __init initmem_init(void) * If requested, try emulation. If emulation is not used, * build identity emu_nid_to_phys[] for numa_add_cpu() */ - if (!emu_cmdline || !numa_emulation(i == 0, i == 1)) + if (!emu_cmdline || !numa_emulation()) for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) emu_nid_to_phys[j] = j; #endif diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d4fbfea5354..8e9d3394f6d 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -26,8 +26,6 @@ int acpi_numa __initdata; -static struct acpi_table_slit *acpi_slit; - static struct bootnode nodes_add[MAX_NUMNODES]; static __init int setup_node(int pxm) @@ -51,25 +49,11 @@ static __init inline int srat_disabled(void) void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { int i, j; - unsigned length; - unsigned long phys; for (i = 0; i < slit->locality_count; i++) for (j = 0; j < slit->locality_count; j++) numa_set_distance(pxm_to_node(i), pxm_to_node(j), slit->entry[slit->locality_count * i + j]); - - /* acpi_slit is used only by emulation */ - length = slit->header.length; - phys = memblock_find_in_range(0, max_pfn_mapped< x2APIC mapping */ @@ -261,55 +245,6 @@ int __init x86_acpi_numa_init(void) return srat_disabled() ? -EINVAL : 0; } -#ifdef CONFIG_NUMA_EMU -static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = { - [0 ... MAX_NUMNODES-1] = PXM_INVAL -}; - -/* - * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID - * mappings that respect the real ACPI topology but reflect our emulated - * environment. For each emulated node, we find which real node it appears on - * and create PXM to NID mappings for those fake nodes which mirror that - * locality. SLIT will now represent the correct distances between emulated - * nodes as a result of the real topology. - */ -void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) -{ - int i; - - for (i = 0; i < num_nodes; i++) { - int nid, pxm; - - nid = find_node_by_addr(fake_nodes[i].start); - if (nid == NUMA_NO_NODE) - continue; - pxm = node_to_pxm(nid); - if (pxm == PXM_INVAL) - continue; - fake_node_to_pxm_map[i] = pxm; - } - - for (i = 0; i < num_nodes; i++) - __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); - - for (i = 0; i < num_nodes; i++) - if (fake_nodes[i].start != fake_nodes[i].end) - node_set(i, numa_nodes_parsed); -} - -int acpi_emu_node_distance(int a, int b) -{ - int index; - - if (!acpi_slit) - return node_to_pxm(a) == node_to_pxm(b) ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - index = acpi_slit->locality_count * node_to_pxm(a); - return acpi_slit->entry[index + node_to_pxm(b)]; -} -#endif /* CONFIG_NUMA_EMU */ - #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start) { -- cgit v1.2.3-70-g09d2