From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2005 15:20:36 -0700 Subject: Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! --- arch/x86_64/mm/numa.c | 294 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 arch/x86_64/mm/numa.c (limited to 'arch/x86_64/mm/numa.c') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c new file mode 100644 index 00000000000..fd9f25d7a6c --- /dev/null +++ b/arch/x86_64/mm/numa.c @@ -0,0 +1,294 @@ +/* + * Generic VM initialization for x86-64 NUMA setups. + * Copyright 2002,2003 Andi Kleen, SuSE Labs. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifndef Dprintk +#define Dprintk(x...) +#endif + +struct pglist_data *node_data[MAX_NUMNODES]; +bootmem_data_t plat_node_bdata[MAX_NUMNODES]; + +int memnode_shift; +u8 memnodemap[NODEMAPSIZE]; + +unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +cpumask_t node_to_cpumask[MAX_NUMNODES]; + +int numa_off __initdata; + +int __init compute_hash_shift(struct node *nodes, int numnodes) +{ + int i; + int shift = 24; + u64 addr; + + /* When in doubt use brute force. */ + while (shift < 48) { + memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); + for (i = 0; i < numnodes; i++) { + if (nodes[i].start == nodes[i].end) + continue; + for (addr = nodes[i].start; + addr < nodes[i].end; + addr += (1UL << shift)) { + if (memnodemap[addr >> shift] != 0xff && + memnodemap[addr >> shift] != i) { + printk(KERN_INFO + "node %d shift %d addr %Lx conflict %d\n", + i, shift, addr, memnodemap[addr>>shift]); + goto next; + } + memnodemap[addr >> shift] = i; + } + } + return shift; + next: + shift++; + } + memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE); + return -1; +} + +/* Initialize bootmem allocator for a node */ +void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) +{ + unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; + unsigned long nodedata_phys; + const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + + start = round_up(start, ZONE_ALIGN); + + printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); + + start_pfn = start >> PAGE_SHIFT; + end_pfn = end >> PAGE_SHIFT; + + nodedata_phys = find_e820_area(start, end, pgdat_size); + if (nodedata_phys == -1L) + panic("Cannot find memory pgdat in node %d\n", nodeid); + + Dprintk("nodedata_phys %lx\n", nodedata_phys); + + node_data[nodeid] = phys_to_virt(nodedata_phys); + memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); + NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; + NODE_DATA(nodeid)->node_start_pfn = start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; + + /* Find a place for the bootmem map */ + bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<> PAGE_SHIFT, + start_pfn, end_pfn); + + e820_bootmem_free(NODE_DATA(nodeid), start, end); + + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); + reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages< 0 have a zero length zone DMA */ + dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; + if (start_pfn < dma_end_pfn) { + zones[ZONE_DMA] = dma_end_pfn - start_pfn; + zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; + } else { + zones[ZONE_NORMAL] = end_pfn - start_pfn; + } + + free_area_init_node(nodeid, NODE_DATA(nodeid), zones, + start_pfn, NULL); +} + +void __init numa_init_array(void) +{ + int rr, i; + /* There are unfortunately some poorly designed mainboards around + that only connect memory to a single CPU. This breaks the 1:1 cpu->node + mapping. To avoid this fill in the mapping for all possible + CPUs, as the number of CPUs is not known yet. + We round robin the existing nodes. */ + rr = 0; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_to_node[i] != NUMA_NO_NODE) + continue; + rr = next_node(rr, node_online_map); + if (rr == MAX_NUMNODES) + rr = first_node(node_online_map); + cpu_to_node[i] = rr; + rr++; + } + + set_bit(0, &node_to_cpumask[cpu_to_node(0)]); +} + +#ifdef CONFIG_NUMA_EMU +int numa_fake __initdata = 0; + +/* Numa emulation */ +static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) +{ + int i; + struct node nodes[MAX_NUMNODES]; + unsigned long sz = ((end_pfn - start_pfn)< 1) { + unsigned long x = 1; + while ((x << 1) < sz) + x <<= 1; + if (x < sz/2) + printk("Numa emulation unbalanced. Complain to maintainer\n"); + sz = x; + } + + memset(&nodes,0,sizeof(nodes)); + for (i = 0; i < numa_fake; i++) { + nodes[i].start = (start_pfn<> 20); + node_set_online(i); + } + memnode_shift = compute_hash_shift(nodes, numa_fake); + if (memnode_shift < 0) { + memnode_shift = 0; + printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); + return -1; + } + for_each_online_node(i) + setup_node_bootmem(i, nodes[i].start, nodes[i].end); + numa_init_array(); + return 0; +} +#endif + +void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) +{ + int i; + +#ifdef CONFIG_NUMA_EMU + if (numa_fake && !numa_emulation(start_pfn, end_pfn)) + return; +#endif + +#ifdef CONFIG_ACPI_NUMA + if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, + end_pfn << PAGE_SHIFT)) + return; +#endif + +#ifdef CONFIG_K8_NUMA + if (!numa_off && !k8_scan_nodes(start_pfn<