summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/init.c24
-rw-r--r--arch/x86/mm/init_64.c27
-rw-r--r--arch/x86/mm/numa.c31
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c20
-rw-r--r--arch/x86/mm/srat_32.c4
6 files changed, 59 insertions, 49 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039..37b8b0fe832 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}
+void __init native_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
struct map_range {
unsigned long start;
unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
__flush_tlb_all();
+ /*
+ * Reserve the kernel pagetable pages we used (pgt_buf_start -
+ * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+ * so that they can be reused for other purposes.
+ *
+ * On native it just means calling memblock_x86_reserve_range, on Xen it
+ * also means marking RW the pagetable pages that we allocated before
+ * but that haven't been used.
+ *
+ * In fact on xen we mark RO the whole range pgt_buf_start -
+ * pgt_buf_top, because we have to make sure that when
+ * init_memory_mapping reaches the pagetable pages area, it maps
+ * RO all the pagetable pages, including the ones that are beyond
+ * pgt_buf_end at that time.
+ */
if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
- pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_end));
if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0aa34669ed3..79423358728 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -52,6 +52,7 @@
#include <asm/cacheflush.h>
#include <asm/init.h>
#include <asm/uv/uv.h>
+#include <asm/setup.h>
static int __init parse_direct_gbpages_off(char *arg)
{
@@ -294,18 +295,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
* to the compile time generated pmds. This results in invalid pmds up
* to the point where we hit the physaddr 0 mapping.
*
- * We limit the mappings to the region from _text to _end. _end is
- * rounded up to the 2MB boundary. This catches the invalid pmds as
+ * We limit the mappings to the region from _text to _brk_end. _brk_end
+ * is rounded up to the 2MB boundary. This catches the invalid pmds as
* well, as they are located before _text:
*/
void __init cleanup_highmap(void)
{
unsigned long vaddr = __START_KERNEL_map;
- unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
+ unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+ unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt;
- pmd_t *last_pmd = pmd + PTRS_PER_PMD;
- for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+ for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
if (pmd_none(*pmd))
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
@@ -861,18 +862,18 @@ static struct vm_area_struct gate_vma = {
.vm_flags = VM_READ | VM_EXEC
};
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(tsk, TIF_IA32))
+ if (!mm || mm->context.ia32_compat)
return NULL;
#endif
return &gate_vma;
}
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = get_gate_vma(task);
+ struct vm_area_struct *vma = get_gate_vma(mm);
if (!vma)
return 0;
@@ -881,11 +882,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr)
}
/*
- * Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives:
+ * Use this when you have no reliable mm, typically from interrupt
+ * context. It is less reliable than using a task's mm and may give
+ * false positives.
*/
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
{
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9559d360fde..745258dfc4d 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -213,53 +213,48 @@ int early_cpu_to_node(int cpu)
return per_cpu(x86_cpu_to_node_map, cpu);
}
-struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+void debug_cpumask_set_cpu(int cpu, int node, bool enable)
{
- int node = early_cpu_to_node(cpu);
struct cpumask *mask;
char buf[64];
if (node == NUMA_NO_NODE) {
/* early_cpu_to_node() already emits a warning and trace */
- return NULL;
+ return;
}
mask = node_to_cpumask_map[node];
if (!mask) {
pr_err("node_to_cpumask_map[%i] NULL\n", node);
dump_stack();
- return NULL;
+ return;
}
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+
cpulist_scnprintf(buf, sizeof(buf), mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable ? "numa_add_cpu" : "numa_remove_cpu",
cpu, node, buf);
- return mask;
+ return;
}
# ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
{
- struct cpumask *mask;
-
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
+ debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
}
void __cpuinit numa_add_cpu(int cpu)
{
- numa_set_cpumask(cpu, 1);
+ numa_set_cpumask(cpu, true);
}
void __cpuinit numa_remove_cpu(int cpu)
{
- numa_set_cpumask(cpu, 0);
+ numa_set_cpumask(cpu, false);
}
# endif /* !CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc7203..85b52fc0308 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
bi->end = min(bi->end, high);
/* and there's no empty block */
- if (bi->start == bi->end) {
+ if (bi->start >= bi->end) {
numa_remove_memblk_from(i--, mi);
continue;
}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index ad091e4cff1..de84cc14037 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -454,10 +454,9 @@ void __cpuinit numa_remove_cpu(int cpu)
cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
}
#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
{
- struct cpumask *mask;
- int nid, physnid, i;
+ int nid, physnid;
nid = early_cpu_to_node(cpu);
if (nid == NUMA_NO_NODE) {
@@ -467,28 +466,21 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
physnid = emu_nid_to_phys[nid];
- for_each_online_node(i) {
+ for_each_online_node(nid) {
if (emu_nid_to_phys[nid] != physnid)
continue;
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
+ debug_cpumask_set_cpu(cpu, nid, enable);
}
}
void __cpuinit numa_add_cpu(int cpu)
{
- numa_set_cpumask(cpu, 1);
+ numa_set_cpumask(cpu, true);
}
void __cpuinit numa_remove_cpu(int cpu)
{
- numa_set_cpumask(cpu, 0);
+ numa_set_cpumask(cpu, false);
}
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 48651c6f657..364f36bdfad 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -211,10 +211,12 @@ int __init get_memcfg_from_srat(void)
{
int i, j, nid;
-
if (srat_disabled())
goto out_fail;
+ if (acpi_numa_init() < 0)
+ goto out_fail;
+
if (num_memory_chunks == 0) {
printk(KERN_DEBUG
"could not find any ACPI SRAT memory areas.\n");