diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/highmem_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 38 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 40 |
8 files changed, 178 insertions, 72 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 9cf33d3ee5b..165c871ba9a 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -155,4 +155,3 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4a476189295..de236e419cb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -287,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -static void __meminit free_new_highpage(struct page *page) -{ - init_page_count(page); - __free_page(page); - totalhigh_pages++; -} - void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - free_new_highpage(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; } else SetPageReserved(page); } -static int __meminit -add_one_highpage_hotplug(struct page *page, unsigned long pfn) -{ - free_new_highpage(page); - totalram_pages++; -#ifdef CONFIG_FLATMEM - max_mapnr = max(pfn, max_mapnr); -#endif - num_physpages++; - - return 0; -} - -/* - * Not currently handling the NUMA case. - * Assuming single node and all memory that - * has been added dynamically that would be - * onlined here is in HIGHMEM. - */ -void __meminit online_page(struct page *page) -{ - ClearPageReserved(page); - add_one_highpage_hotplug(page, page_to_pfn(page)); -} - #ifndef CONFIG_NUMA static void __init set_highmem_pages_init(int bad_ppro) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cca6266303..32ba13b0f81 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -620,15 +620,6 @@ void __init paging_init(void) /* * Memory hotplug specific functions */ -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory is added always to NORMAL zone. This means you will never get @@ -810,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { #ifdef CONFIG_NUMA - int nid = phys_to_nid(phys); + int nid, next_nid; #endif unsigned long pfn = phys >> PAGE_SHIFT; @@ -829,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) /* Should check here against the e820 map to avoid double free */ #ifdef CONFIG_NUMA - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + nid = phys_to_nid(phys); + next_nid = phys_to_nid(phys + len - 1); + if (nid == next_nid) + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + else + reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #else reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; set_dma_reserve(dma_reserve); @@ -926,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. */ +static long __meminitdata addr_start, addr_end; +static void __meminitdata *p_start, *p_end; +static int __meminitdata node_start; + int __meminit vmemmap_populate(struct page *start_page, unsigned long size, int node) { @@ -960,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); - printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", - addr, addr + PMD_SIZE - 1, p, node); + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; + } + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; } else { vmemmap_verify((pte_t *)pmd, node, addr, next); } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (p_start) { + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + p_start = NULL; + p_end = NULL; + node_start = 0; + } +} #endif diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d176b23110c..71bb3159031 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) +static void __iomem *__ioremap_caller(resource_size_t phys_addr, + unsigned long size, unsigned long prot_val, void *caller) { unsigned long pfn, offset, vaddr; resource_size_t last_addr; @@ -149,7 +149,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, * Don't allow anybody to remap normal RAM that we're using.. */ for (pfn = phys_addr >> PAGE_SHIFT; - (pfn << PAGE_SHIFT) < last_addr; pfn++) { + (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); + pfn++) { int is_ram = page_is_ram(pfn); @@ -176,11 +177,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, /* * Do not fallback to certain memory types with certain * requested type: - * - request is uncached, return cannot be write-back - * - request is uncached, return cannot be write-combine + * - request is uc-, return cannot be write-back + * - request is uc-, return cannot be write-combine * - request is write-combine, return cannot be write-back */ - if ((prot_val == _PAGE_CACHE_UC && + if ((prot_val == _PAGE_CACHE_UC_MINUS && (new_prot_val == _PAGE_CACHE_WB || new_prot_val == _PAGE_CACHE_WC)) || (prot_val == _PAGE_CACHE_WC && @@ -201,6 +202,9 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, default: prot = PAGE_KERNEL_NOCACHE; break; + case _PAGE_CACHE_UC_MINUS: + prot = PAGE_KERNEL_UC_MINUS; + break; case _PAGE_CACHE_WC: prot = PAGE_KERNEL_WC; break; @@ -212,7 +216,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, /* * Ok, go for it.. */ - area = get_vm_area(size, VM_IOREMAP); + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; area->phys_addr = phys_addr; @@ -255,7 +259,17 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, */ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { - return __ioremap(phys_addr, size, _PAGE_CACHE_UC); + /* + * Ideally, this should be: + * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * + * Till we fix all X drivers to use ioremap_wc(), we will use + * UC MINUS. + */ + unsigned long val = _PAGE_CACHE_UC_MINUS; + + return __ioremap_caller(phys_addr, size, val, + __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_nocache); @@ -272,7 +286,8 @@ EXPORT_SYMBOL(ioremap_nocache); void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { if (pat_wc_enabled) - return __ioremap(phys_addr, size, _PAGE_CACHE_WC); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, + __builtin_return_address(0)); else return ioremap_nocache(phys_addr, size); } @@ -280,7 +295,8 @@ EXPORT_SYMBOL(ioremap_wc); void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { - return __ioremap(phys_addr, size, _PAGE_CACHE_WB); + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, + __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9..1f476e47784 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -13,12 +13,15 @@ #include <linux/nodemask.h> #include <asm/io.h> #include <linux/pci_ids.h> +#include <linux/acpi.h> #include <asm/types.h> #include <asm/mmzone.h> #include <asm/proto.h> #include <asm/e820.h> #include <asm/pci-direct.h> #include <asm/numa.h> +#include <asm/mpspec.h> +#include <asm/apic.h> static __init int find_northbridge(void) { @@ -44,6 +47,30 @@ static __init int find_northbridge(void) return -1; } +static __init void early_get_boot_cpu_id(void) +{ + /* + * need to get boot_cpu_id so can use that to create apicid_to_node + * in k8_scan_nodes() + */ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); +#ifdef CONFIG_ACPI + /* + * Read APIC information from ACPI tables. + */ + early_acpi_boot_init(); +#endif + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); + early_init_lapic_mapping(); +} + int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; @@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned cores; unsigned bits; int j; + unsigned apicid_base; if (!early_pci_allowed()) return -1; @@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) /* use the coreid bits from early_identify_cpu */ bits = boot_cpu_data.x86_coreid_bits; cores = (1<<bits); + apicid_base = 0; + /* need to get boot_cpu_id early for system with apicid lifting */ + early_get_boot_cpu_id(); + if (boot_cpu_physical_apicid > 0) { + printk(KERN_INFO "BSP APIC ID: %02x\n", + boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { nodeid = nodeids[i]; - for (j = 0; j < cores; j++) + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(nodeid << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b2..c5066d519e5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + int nid; start = round_up(start, ZONE_ALIGN); @@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + /* + * convert early reserve to bootmem reserve earlier + * otherwise early_node_mem could use early reserved mem + * on previous node + */ + early_res_to_bootmem(start, end); + + /* + * in some case early_node_mem could use alloc_bootmem + * to get range on other node, don't reserve that again + */ + if (nid != nodeid) + printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, + pgdat_size, BOOTMEM_DEFAULT); + nid = phys_to_nid(bootmap_start); + if (nid != nodeid) + printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, + bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + #ifdef CONFIG_ACPI_NUMA srat_reserve_add_area(nodeid); #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bd5e05c654d..60bcb5b6a37 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -777,14 +777,20 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, int _set_memory_uc(unsigned long addr, int numpages) { + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ return change_page_attr_set(addr, numpages, - __pgprot(_PAGE_CACHE_UC)); + __pgprot(_PAGE_CACHE_UC_MINUS)); } int set_memory_uc(unsigned long addr, int numpages) { + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, - _PAGE_CACHE_UC, NULL)) + _PAGE_CACHE_UC_MINUS, NULL)) return -EINVAL; return _set_memory_uc(addr, numpages); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 9851265e4d6..277446cd30b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -16,6 +16,7 @@ #include <asm/msr.h> #include <asm/tlbflush.h> #include <asm/processor.h> +#include <asm/page.h> #include <asm/pgtable.h> #include <asm/pat.h> #include <asm/e820.h> @@ -386,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); + pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", + saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, &saved_ptr->nd); new_entry = NULL; @@ -477,14 +478,43 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } +#ifdef CONFIG_NONPROMISC_DEVMEM +/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) { + printk(KERN_INFO + "Program %s tried to access /dev/mem between %Lx->%Lx.\n", + current->comm, from, to); + return 0; + } + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#endif /* CONFIG_NONPROMISC_DEVMEM */ + int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { u64 offset = ((u64) pfn) << PAGE_SHIFT; unsigned long flags = _PAGE_CACHE_UC_MINUS; - unsigned long ret_flags; int retval; + if (!range_is_allowed(pfn, size)) + return 0; + if (file->f_flags & O_SYNC) { flags = _PAGE_CACHE_UC; } @@ -518,14 +548,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (flags != _PAGE_CACHE_UC_MINUS) { retval = reserve_memtype(offset, offset + size, flags, NULL); } else { - retval = reserve_memtype(offset, offset + size, -1, &ret_flags); + retval = reserve_memtype(offset, offset + size, -1, &flags); } if (retval < 0) return 0; - flags = ret_flags; - if (pfn <= max_pfn_mapped && ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); |