From f1157141636848f52c5f74040bed0ba355cf59b7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 7 Dec 2010 00:55:29 -0800 Subject: x86, apic: Remove early_init_lapic_mapping() It is almost the same as smp_register_lapic_addr(). We just need to let smp_read_mpc() call smp_register_lapic_addr() when early==1. Add the apic_printk to smp_register_lapic_address() Signed-off-by: Yinghai Lu Cc: Suresh Siddha Cc: "Eric W. Biederman" LKML-Reference: <4CFDF681.3030509@kernel.org> Signed-off-by: Thomas Gleixner --- arch/x86/mm/amdtopology_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 51fae9cfdec..08a0069b87a 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -66,7 +66,6 @@ static __init void early_get_boot_cpu_id(void) if (smp_found_config) early_get_smp_config(); #endif - early_init_lapic_mapping(); } int __init amd_get_nodes(struct bootnode *physnodes) -- cgit v1.2.3-70-g09d2 From 4e76f4e67a106ed827ca721b4c8b622047cd2f6d Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:47 -0800 Subject: x86, numa: Avoid compiling NUMA emulation functions without CONFIG_NUMA_EMU Both acpi_get_nodes() and amd_get_nodes() are only necessary when CONFIG_NUMA_EMU is enabled, so avoid compiling them when the option is disabled. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 5 ++++- arch/x86/include/asm/amd_nb.h | 5 ++++- arch/x86/mm/amdtopology_64.c | 2 ++ arch/x86/mm/srat_64.c | 2 ++ 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 55d106b5e31..b326fa99db5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -188,14 +188,17 @@ extern int acpi_numa; extern int acpi_get_nodes(struct bootnode *physnodes); extern int acpi_scan_nodes(unsigned long start, unsigned long end); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) + +#ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); +#endif #else static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) { } -#endif +#endif /* CONFIG_ACPI_NUMA */ #define acpi_unlazy_tlb(x) leave_mm(x) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 6aee50d655d..9c16cde63f0 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -9,10 +9,13 @@ struct bootnode; extern int early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); -extern int amd_get_nodes(struct bootnode *nodes); extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int amd_scan_nodes(void); +#ifdef CONFIG_NUMA_EMU +extern int amd_get_nodes(struct bootnode *nodes); +#endif + struct amd_northbridge { struct pci_dev *misc; }; diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 51fae9cfdec..fe050af614e 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -69,6 +69,7 @@ static __init void early_get_boot_cpu_id(void) early_init_lapic_mapping(); } +#ifdef CONFIG_NUMA_EMU int __init amd_get_nodes(struct bootnode *physnodes) { int i; @@ -81,6 +82,7 @@ int __init amd_get_nodes(struct bootnode *physnodes) } return ret; } +#endif /* CONFIG_NUMA_EMU */ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) { diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b06..8241bf0f6eb 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -339,6 +339,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} +#ifdef CONFIG_NUMA_EMU int __init acpi_get_nodes(struct bootnode *physnodes) { int i; @@ -351,6 +352,7 @@ int __init acpi_get_nodes(struct bootnode *physnodes) } return ret; } +#endif /* CONFIG_NUMA_EMU */ /* Use the information discovered above to actually set up the nodes. */ int __init acpi_scan_nodes(unsigned long start, unsigned long end) -- cgit v1.2.3-70-g09d2 From f51bf3073a145a5b3263fd882c52d6ec04b687da Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:51 -0800 Subject: x86, numa: Fake apicid and pxm mappings for NUMA emulation This patch adds the equivalent of acpi_fake_nodes() for AMD Northbridge platforms. The goal is to fake the apicid-to-node mappings for NUMA emulation so the physical topology of the machine is correctly maintained within the kernel. This change also fakes proximity domains for both ACPI and k8 code so the physical distance between emulated nodes is maintained via node_distance(). This exports the correct distances via /sys/devices/system/node/.../distance based on the underlying topology. A new helper function, fake_physnodes(), is introduced to correctly invoke the correct NUMA code to fake these two mappings based on the system type. If there is no underlying NUMA configuration, all cpus are mapped to node 0 for local distance. Since acpi_fake_nodes() is no longer called with CONFIG_ACPI_NUMA, it's prototype can be removed from the header file for such a configuration. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 5 --- arch/x86/include/asm/amd_nb.h | 1 + arch/x86/mm/amdtopology_64.c | 91 +++++++++++++++++++++++++++++++++++-------- arch/x86/mm/numa_64.c | 20 +++++++++- arch/x86/mm/srat_64.c | 2 - 5 files changed, 95 insertions(+), 24 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index b326fa99db5..8288daf72dc 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -193,11 +193,6 @@ extern int acpi_scan_nodes(unsigned long start, unsigned long end); extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); #endif -#else -static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, - int num_nodes) -{ -} #endif /* CONFIG_ACPI_NUMA */ #define acpi_unlazy_tlb(x) leave_mm(x) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 9c16cde63f0..8f6192c1592 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -13,6 +13,7 @@ extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int amd_scan_nodes(void); #ifdef CONFIG_NUMA_EMU +extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); extern int amd_get_nodes(struct bootnode *nodes); #endif diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index fe050af614e..eb5cbb97b68 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -27,6 +27,7 @@ #include static struct bootnode __initdata nodes[8]; +static unsigned char __initdata nodeids[8]; static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; static __init int find_northbridge(void) @@ -69,21 +70,6 @@ static __init void early_get_boot_cpu_id(void) early_init_lapic_mapping(); } -#ifdef CONFIG_NUMA_EMU -int __init amd_get_nodes(struct bootnode *physnodes) -{ - int i; - int ret = 0; - - for_each_node_mask(i, nodes_parsed) { - physnodes[ret].start = nodes[i].start; - physnodes[ret].end = nodes[i].end; - ret++; - } - return ret; -} -#endif /* CONFIG_NUMA_EMU */ - int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) { unsigned long start = PFN_PHYS(start_pfn); @@ -116,7 +102,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); - nodeid = limit & 7; + nodeids[i] = nodeid = limit & 7; if ((base & 3) == 0) { if (i < numnodes) pr_info("Skipping disabled node %d\n", i); @@ -196,6 +182,79 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) return 0; } +#ifdef CONFIG_NUMA_EMU +static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; + +int __init amd_get_nodes(struct bootnode *physnodes) +{ + int i; + int ret = 0; + + for_each_node_mask(i, nodes_parsed) { + physnodes[ret].start = nodes[i].start; + physnodes[ret].end = nodes[i].end; + ret++; + } + return ret; +} + +static int __init find_node_by_addr(unsigned long addr) +{ + int ret = NUMA_NO_NODE; + int i; + + for (i = 0; i < 8; i++) + if (addr >= nodes[i].start && addr < nodes[i].end) { + ret = i; + break; + } + return ret; +} + +/* + * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be + * setup to represent the physical topology but reflect the emulated + * environment. For each emulated node, the real node which it appears on is + * found and a fake pxm to nid mapping is created which mirrors the actual + * locality. node_distance() then represents the correct distances between + * emulated nodes by using the fake acpi mappings to pxms. + */ +void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) +{ + unsigned int bits; + unsigned int cores; + unsigned int apicid_base = 0; + int i; + + bits = boot_cpu_data.x86_coreid_bits; + cores = 1 << bits; + early_get_boot_cpu_id(); + if (boot_cpu_physical_apicid > 0) + apicid_base = boot_cpu_physical_apicid; + + for (i = 0; i < nr_nodes; i++) { + int index; + int nid; + int j; + + nid = find_node_by_addr(nodes[i].start); + if (nid == NUMA_NO_NODE) + continue; + + index = nodeids[nid] << bits; + if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE) + for (j = apicid_base; j < cores + apicid_base; j++) + fake_apicid_to_node[index + j] = i; +#ifdef CONFIG_ACPI_NUMA + __acpi_map_pxm_to_node(nid, i); +#endif + } + memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); +} +#endif /* CONFIG_NUMA_EMU */ + int __init amd_scan_nodes(void) { unsigned int bits; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 7762a517d69..cc390f3a1bd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -324,6 +324,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, return ret; } +static void __init fake_physnodes(int acpi, int amd, int nr_nodes) +{ + int i; + + BUG_ON(acpi && amd); +#ifdef CONFIG_ACPI_NUMA + if (acpi) + acpi_fake_nodes(nodes, nr_nodes); +#endif +#ifdef CONFIG_AMD_NUMA + if (amd) + amd_fake_nodes(nodes, nr_nodes); +#endif + if (!acpi && !amd) + for (i = 0; i < nr_cpu_ids; i++) + numa_set_node(i, 0); +} + /* * Setups up nid to range from addr to addr + size. If the end * boundary is greater than max_addr, then max_addr is used instead. @@ -595,7 +613,7 @@ static int __init numa_emulation(unsigned long start_pfn, nodes[i].end >> PAGE_SHIFT); setup_node_bootmem(i, nodes[i].start, nodes[i].end); } - acpi_fake_nodes(nodes, num_nodes); + fake_physnodes(acpi, amd, num_nodes); numa_init_array(); return 0; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 8241bf0f6eb..c48b443706c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -497,8 +497,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) { int i, j; - printk(KERN_INFO "Faking PXM affinity for fake nodes on real " - "topology.\n"); for (i = 0; i < num_nodes; i++) { int nid, pxm; -- cgit v1.2.3-70-g09d2 From a387e95a49743cf9835c5299ca549232618d8249 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Dec 2010 17:23:56 -0800 Subject: x86, numa: Fix cpu to node mapping for sparse node ids NUMA boot code assumes that physical node ids start at 0, but the DIMMs that the apic id represents may not be reachable. If this is the case, node 0 is never online and cpus never end up getting appropriately assigned to a node. This causes the cpumask of all online nodes to be empty and machines crash with kernel code assuming online nodes have valid cpus. The fix is to appropriately map all the address ranges for physical nodes and ensure the cpu to node mapping function checks all possible nodes (up to MAX_NUMNODES) instead of simply checking nodes 0-N, where N is the number of physical nodes, for valid address ranges. This requires no longer "compressing" the address ranges of nodes in the physical node map from 0-N, but rather leave indices in physnodes[] to represent the actual node id of the physical node. Accordingly, the topology exported by both amd_get_nodes() and acpi_get_nodes() no longer must return the number of nodes to iterate through; all such iterations will now be to MAX_NUMNODES. This change also passes the end address of system RAM (which may be different from normal operation if mem= is specified on the command line) before the physnodes[] array is populated. ACPI parsed nodes are truncated to fit within the address range that respect the mem= boundaries and even some physical nodes may become unreachable in such cases. When NUMA emulation does succeed, any apicid to node mapping that exists for unreachable nodes are given default values so that proximity domains can still be assigned. This is important for node_distance() to function as desired. Signed-off-by: David Rientjes LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/acpi.h | 3 ++- arch/x86/include/asm/amd_nb.h | 2 +- arch/x86/mm/amdtopology_64.c | 9 +++------ arch/x86/mm/numa_64.c | 18 +++--------------- arch/x86/mm/srat_64.c | 22 ++++++++++++++++------ 5 files changed, 25 insertions(+), 29 deletions(-) (limited to 'arch/x86/mm/amdtopology_64.c') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8288daf72dc..211ca3f7fd1 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -185,7 +185,8 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; -extern int acpi_get_nodes(struct bootnode *physnodes); +extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, + unsigned long end); extern int acpi_scan_nodes(unsigned long start, unsigned long end); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 8f6192c1592..980f2256763 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -14,7 +14,7 @@ extern int amd_scan_nodes(void); #ifdef CONFIG_NUMA_EMU extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); -extern int amd_get_nodes(struct bootnode *nodes); +extern void amd_get_nodes(struct bootnode *nodes); #endif struct amd_northbridge { diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index eb5cbb97b68..0df2623d103 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -187,17 +187,14 @@ static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -int __init amd_get_nodes(struct bootnode *physnodes) +void __init amd_get_nodes(struct bootnode *physnodes) { int i; - int ret = 0; for_each_node_mask(i, nodes_parsed) { - physnodes[ret].start = nodes[i].start; - physnodes[ret].end = nodes[i].end; - ret++; + physnodes[i].start = nodes[i].start; + physnodes[i].end = nodes[i].end; } - return ret; } static int __init find_node_by_addr(unsigned long addr) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index dd300c491f1..3d73201ba34 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -266,25 +266,24 @@ static char *cmdline __initdata; static int __init setup_physnodes(unsigned long start, unsigned long end, int acpi, int amd) { - int nr_nodes = 0; int ret = 0; int i; memset(physnodes, 0, sizeof(physnodes)); #ifdef CONFIG_ACPI_NUMA if (acpi) - nr_nodes = acpi_get_nodes(physnodes); + acpi_get_nodes(physnodes, start, end); #endif #ifdef CONFIG_AMD_NUMA if (amd) - nr_nodes = amd_get_nodes(physnodes); + amd_get_nodes(physnodes); #endif /* * Basic sanity checking on the physical node map: there may be errors * if the SRAT or AMD code incorrectly reported the topology or the mem= * kernel parameter is used. */ - for (i = 0; i < nr_nodes; i++) { + for (i = 0; i < MAX_NUMNODES; i++) { if (physnodes[i].start == physnodes[i].end) continue; if (physnodes[i].start > end) { @@ -299,17 +298,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, physnodes[i].start = start; if (physnodes[i].end > end) physnodes[i].end = end; - } - - /* - * Remove all nodes that have no memory or were truncated because of the - * limited address range. - */ - for (i = 0; i < nr_nodes; i++) { - if (physnodes[i].start == physnodes[i].end) - continue; - physnodes[ret].start = physnodes[i].start; - physnodes[ret].end = physnodes[i].end; ret++; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index c48b443706c..a756bcf3fa4 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -340,17 +340,16 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) void __init acpi_numa_arch_fixup(void) {} #ifdef CONFIG_NUMA_EMU -int __init acpi_get_nodes(struct bootnode *physnodes) +void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, + unsigned long end) { int i; - int ret = 0; for_each_node_mask(i, nodes_parsed) { - physnodes[ret].start = nodes[i].start; - physnodes[ret].end = nodes[i].end; - ret++; + cutoff_node(i, start, end); + physnodes[i].start = nodes[i].start; + physnodes[i].end = nodes[i].end; } - return ret; } #endif /* CONFIG_NUMA_EMU */ @@ -516,6 +515,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) fake_apicid_to_node[j] == NUMA_NO_NODE) fake_apicid_to_node[j] = i; } + + /* + * If there are apicid-to-node mappings for physical nodes that do not + * have a corresponding emulated node, it should default to a guaranteed + * value. + */ + for (i = 0; i < MAX_LOCAL_APIC; i++) + if (apicid_to_node[i] != NUMA_NO_NODE && + fake_apicid_to_node[i] == NUMA_NO_NODE) + fake_apicid_to_node[i] = 0; + for (i = 0; i < num_nodes; i++) __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); -- cgit v1.2.3-70-g09d2