summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c47
-rw-r--r--arch/x86/mm/ioremap.c36
-rw-r--r--arch/x86/mm/k8topology_64.c38
-rw-r--r--arch/x86/mm/numa_64.c42
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/pat.c40
8 files changed, 178 insertions, 72 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 9cf33d3ee5b..165c871ba9a 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -155,4 +155,3 @@ EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_to_page);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4a476189295..de236e419cb 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -287,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __meminit free_new_highpage(struct page *page)
-{
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
-}
-
void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
- free_new_highpage(page);
+ init_page_count(page);
+ __free_page(page);
+ totalhigh_pages++;
} else
SetPageReserved(page);
}
-static int __meminit
-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
- free_new_highpage(page);
- totalram_pages++;
-#ifdef CONFIG_FLATMEM
- max_mapnr = max(pfn, max_mapnr);
-#endif
- num_physpages++;
-
- return 0;
-}
-
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM.
- */
-void __meminit online_page(struct page *page)
-{
- ClearPageReserved(page);
- add_one_highpage_hotplug(page, page_to_pfn(page));
-}
-
#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0cca6266303..32ba13b0f81 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -620,15 +620,6 @@ void __init paging_init(void)
/*
* Memory hotplug specific functions
*/
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory is added always to NORMAL zone. This means you will never get
@@ -810,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
{
#ifdef CONFIG_NUMA
- int nid = phys_to_nid(phys);
+ int nid, next_nid;
#endif
unsigned long pfn = phys >> PAGE_SHIFT;
@@ -829,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
- reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+ nid = phys_to_nid(phys);
+ next_nid = phys_to_nid(phys + len - 1);
+ if (nid == next_nid)
+ reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+ else
+ reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#else
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#endif
+
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
set_dma_reserve(dma_reserve);
@@ -926,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma)
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
*/
+static long __meminitdata addr_start, addr_end;
+static void __meminitdata *p_start, *p_end;
+static int __meminitdata node_start;
+
int __meminit
vmemmap_populate(struct page *start_page, unsigned long size, int node)
{
@@ -960,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
PAGE_KERNEL_LARGE);
set_pmd(pmd, __pmd(pte_val(entry)));
- printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
- addr, addr + PMD_SIZE - 1, p, node);
+ /* check to see if we have contiguous blocks */
+ if (p_end != p || node_start != node) {
+ if (p_start)
+ printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
+ addr_start = addr;
+ node_start = node;
+ p_start = p;
+ }
+ addr_end = addr + PMD_SIZE;
+ p_end = p + PMD_SIZE;
} else {
vmemmap_verify((pte_t *)pmd, node, addr, next);
}
}
return 0;
}
+
+void __meminit vmemmap_populate_print_last(void)
+{
+ if (p_start) {
+ printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
+ p_start = NULL;
+ p_end = NULL;
+ node_start = 0;
+ }
+}
#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d176b23110c..71bb3159031 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
- unsigned long prot_val)
+static void __iomem *__ioremap_caller(resource_size_t phys_addr,
+ unsigned long size, unsigned long prot_val, void *caller)
{
unsigned long pfn, offset, vaddr;
resource_size_t last_addr;
@@ -149,7 +149,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
* Don't allow anybody to remap normal RAM that we're using..
*/
for (pfn = phys_addr >> PAGE_SHIFT;
- (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+ (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK);
+ pfn++) {
int is_ram = page_is_ram(pfn);
@@ -176,11 +177,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
/*
* Do not fallback to certain memory types with certain
* requested type:
- * - request is uncached, return cannot be write-back
- * - request is uncached, return cannot be write-combine
+ * - request is uc-, return cannot be write-back
+ * - request is uc-, return cannot be write-combine
* - request is write-combine, return cannot be write-back
*/
- if ((prot_val == _PAGE_CACHE_UC &&
+ if ((prot_val == _PAGE_CACHE_UC_MINUS &&
(new_prot_val == _PAGE_CACHE_WB ||
new_prot_val == _PAGE_CACHE_WC)) ||
(prot_val == _PAGE_CACHE_WC &&
@@ -201,6 +202,9 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
default:
prot = PAGE_KERNEL_NOCACHE;
break;
+ case _PAGE_CACHE_UC_MINUS:
+ prot = PAGE_KERNEL_UC_MINUS;
+ break;
case _PAGE_CACHE_WC:
prot = PAGE_KERNEL_WC;
break;
@@ -212,7 +216,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
/*
* Ok, go for it..
*/
- area = get_vm_area(size, VM_IOREMAP);
+ area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
return NULL;
area->phys_addr = phys_addr;
@@ -255,7 +259,17 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
*/
void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
- return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
+ /*
+ * Ideally, this should be:
+ * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
+ *
+ * Till we fix all X drivers to use ioremap_wc(), we will use
+ * UC MINUS.
+ */
+ unsigned long val = _PAGE_CACHE_UC_MINUS;
+
+ return __ioremap_caller(phys_addr, size, val,
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_nocache);
@@ -272,7 +286,8 @@ EXPORT_SYMBOL(ioremap_nocache);
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
{
if (pat_wc_enabled)
- return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+ __builtin_return_address(0));
else
return ioremap_nocache(phys_addr, size);
}
@@ -280,7 +295,8 @@ EXPORT_SYMBOL(ioremap_wc);
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{
- return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 86808e666f9..1f476e47784 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -13,12 +13,15 @@
#include <linux/nodemask.h>
#include <asm/io.h>
#include <linux/pci_ids.h>
+#include <linux/acpi.h>
#include <asm/types.h>
#include <asm/mmzone.h>
#include <asm/proto.h>
#include <asm/e820.h>
#include <asm/pci-direct.h>
#include <asm/numa.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
static __init int find_northbridge(void)
{
@@ -44,6 +47,30 @@ static __init int find_northbridge(void)
return -1;
}
+static __init void early_get_boot_cpu_id(void)
+{
+ /*
+ * need to get boot_cpu_id so can use that to create apicid_to_node
+ * in k8_scan_nodes()
+ */
+ /*
+ * Find possible boot-time SMP configuration:
+ */
+ early_find_smp_config();
+#ifdef CONFIG_ACPI
+ /*
+ * Read APIC information from ACPI tables.
+ */
+ early_acpi_boot_init();
+#endif
+ /*
+ * get boot-time SMP configuration:
+ */
+ if (smp_found_config)
+ early_get_smp_config();
+ early_init_lapic_mapping();
+}
+
int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
@@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned cores;
unsigned bits;
int j;
+ unsigned apicid_base;
if (!early_pci_allowed())
return -1;
@@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
/* use the coreid bits from early_identify_cpu */
bits = boot_cpu_data.x86_coreid_bits;
cores = (1<<bits);
+ apicid_base = 0;
+ /* need to get boot_cpu_id early for system with apicid lifting */
+ early_get_boot_cpu_id();
+ if (boot_cpu_physical_apicid > 0) {
+ printk(KERN_INFO "BSP APIC ID: %02x\n",
+ boot_cpu_physical_apicid);
+ apicid_base = boot_cpu_physical_apicid;
+ }
for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) {
nodeid = nodeids[i];
- for (j = 0; j < cores; j++)
+ for (j = apicid_base; j < cores + apicid_base; j++)
apicid_to_node[(nodeid << bits) + j] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b2..c5066d519e5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
unsigned long bootmap_start, nodedata_phys;
void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+ int nid;
start = round_up(start, ZONE_ALIGN);
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
- /* Find a place for the bootmem map */
+ /*
+ * Find a place for the bootmem map
+ * nodedata_phys could be on other nodes by alloc_bootmem,
+ * so need to sure bootmap_start not to be small, otherwise
+ * early_node_mem will get that with find_e820_area instead
+ * of alloc_bootmem, that could clash with reserved range
+ */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ nid = phys_to_nid(nodedata_phys);
+ if (nid == nodeid)
+ bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ else
+ bootmap_start = round_up(start, PAGE_SIZE);
/*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
- BOOTMEM_DEFAULT);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+ /*
+ * convert early reserve to bootmem reserve earlier
+ * otherwise early_node_mem could use early reserved mem
+ * on previous node
+ */
+ early_res_to_bootmem(start, end);
+
+ /*
+ * in some case early_node_mem could use alloc_bootmem
+ * to get range on other node, don't reserve that again
+ */
+ if (nid != nodeid)
+ printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
+ pgdat_size, BOOTMEM_DEFAULT);
+ nid = phys_to_nid(bootmap_start);
+ if (nid != nodeid)
+ printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
+ else
+ reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+ bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bd5e05c654d..60bcb5b6a37 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -777,14 +777,20 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
int _set_memory_uc(unsigned long addr, int numpages)
{
+ /*
+ * for now UC MINUS. see comments in ioremap_nocache()
+ */
return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_CACHE_UC));
+ __pgprot(_PAGE_CACHE_UC_MINUS));
}
int set_memory_uc(unsigned long addr, int numpages)
{
+ /*
+ * for now UC MINUS. see comments in ioremap_nocache()
+ */
if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
- _PAGE_CACHE_UC, NULL))
+ _PAGE_CACHE_UC_MINUS, NULL))
return -EINVAL;
return _set_memory_uc(addr, numpages);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9851265e4d6..277446cd30b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -16,6 +16,7 @@
#include <asm/msr.h>
#include <asm/tlbflush.h>
#include <asm/processor.h>
+#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/pat.h>
#include <asm/e820.h>
@@ -386,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
break;
}
- printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
- saved_ptr->start, saved_ptr->end);
+ pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
+ saved_ptr->start, saved_ptr->end);
/* No conflict. Go ahead and add this new entry */
list_add(&new_entry->nd, &saved_ptr->nd);
new_entry = NULL;
@@ -477,14 +478,43 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot;
}
+#ifdef CONFIG_NONPROMISC_DEVMEM
+/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+ u64 from = ((u64)pfn) << PAGE_SHIFT;
+ u64 to = from + size;
+ u64 cursor = from;
+
+ while (cursor < to) {
+ if (!devmem_is_allowed(pfn)) {
+ printk(KERN_INFO
+ "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
+ current->comm, from, to);
+ return 0;
+ }
+ cursor += PAGE_SIZE;
+ pfn++;
+ }
+ return 1;
+}
+#endif /* CONFIG_NONPROMISC_DEVMEM */
+
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
u64 offset = ((u64) pfn) << PAGE_SHIFT;
unsigned long flags = _PAGE_CACHE_UC_MINUS;
- unsigned long ret_flags;
int retval;
+ if (!range_is_allowed(pfn, size))
+ return 0;
+
if (file->f_flags & O_SYNC) {
flags = _PAGE_CACHE_UC;
}
@@ -518,14 +548,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (flags != _PAGE_CACHE_UC_MINUS) {
retval = reserve_memtype(offset, offset + size, flags, NULL);
} else {
- retval = reserve_memtype(offset, offset + size, -1, &ret_flags);
+ retval = reserve_memtype(offset, offset + size, -1, &flags);
}
if (retval < 0)
return 0;
- flags = ret_flags;
-
if (pfn <= max_pfn_mapped &&
ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
free_memtype(offset, offset + size);