summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/gup.c28
-rw-r--r--arch/x86/mm/numa.c22
-rw-r--r--arch/x86/mm/numa_64.c24
-rw-r--r--arch/x86/mm/pageattr.c8
-rw-r--r--arch/x86/mm/pgtable.c66
-rw-r--r--arch/x86/mm/srat_32.c1
6 files changed, 120 insertions, 29 deletions
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 738e6593799..dbe34b93137 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -8,6 +8,7 @@
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
+#include <linux/swap.h>
#include <asm/pgtable.h>
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
get_page(page);
+ SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -103,6 +105,17 @@ static inline void get_head_page_multiple(struct page *page, int nr)
VM_BUG_ON(page != compound_head(page));
VM_BUG_ON(page_count(page) == 0);
atomic_add(nr, &page->_count);
+ SetPageReferenced(page);
+}
+
+static inline void get_huge_page_tail(struct page *page)
+{
+ /*
+ * __split_huge_page_refcount() cannot run
+ * from under us.
+ */
+ VM_BUG_ON(atomic_read(&page->_count) < 0);
+ atomic_inc(&page->_count);
}
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
@@ -128,6 +141,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
+ if (PageTail(page))
+ get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
@@ -148,7 +163,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
+ /*
+ * The pmd_trans_splitting() check below explains why
+ * pmdp_splitting_flush has to flush the tlb, to stop
+ * this gup-fast code from running while we set the
+ * splitting bit in the pmd. Returning zero will take
+ * the slow path that will call wait_split_huge_page()
+ * if the pmd is still in splitting state. gup-fast
+ * can't because it has irq disabled and
+ * wait_split_huge_page() would never return as the
+ * tlb flush IPI wouldn't run.
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52ca49c..ebf6d7887a3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -2,6 +2,28 @@
#include <linux/topology.h>
#include <linux/module.h>
#include <linux/bootmem.h>
+#include <asm/numa.h>
+#include <asm/acpi.h>
+
+int __initdata numa_off;
+
+static __init int numa_setup(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+ if (!strncmp(opt, "off", 3))
+ numa_off = 1;
+#ifdef CONFIG_NUMA_EMU
+ if (!strncmp(opt, "fake=", 5))
+ numa_emu_cmdline(opt + 5);
+#endif
+#ifdef CONFIG_ACPI_NUMA
+ if (!strncmp(opt, "noacpi", 6))
+ acpi_numa = -1;
+#endif
+ return 0;
+}
+early_param("numa", numa_setup);
/*
* Which logical CPUs are on which nodes
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 1e72102e80c..95ea1551eeb 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -30,7 +30,6 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
-int numa_off __initdata;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
@@ -263,6 +262,11 @@ static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
static char *cmdline __initdata;
+void __init numa_emu_cmdline(char *str)
+{
+ cmdline = str;
+}
+
static int __init setup_physnodes(unsigned long start, unsigned long end,
int acpi, int amd)
{
@@ -670,24 +674,6 @@ unsigned long __init numa_free_all_bootmem(void)
return pages;
}
-static __init int numa_setup(char *opt)
-{
- if (!opt)
- return -EINVAL;
- if (!strncmp(opt, "off", 3))
- numa_off = 1;
-#ifdef CONFIG_NUMA_EMU
- if (!strncmp(opt, "fake=", 5))
- cmdline = opt + 5;
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!strncmp(opt, "noacpi", 6))
- acpi_numa = -1;
-#endif
- return 0;
-}
-early_param("numa", numa_setup);
-
#ifdef CONFIG_NUMA
static __init int find_near_online_node(int node)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8b830ca14ac..d343b3c81f3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -256,7 +256,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
unsigned long pfn)
{
pgprot_t forbidden = __pgprot(0);
- pgprot_t required = __pgprot(0);
/*
* The BIOS area between 640k and 1Mb needs to be executable for
@@ -282,12 +281,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
- /*
- * .data and .bss should always be writable.
- */
- if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
- within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
- pgprot_val(required) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
/*
@@ -327,7 +320,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
- prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
return prot;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8be8c7d7bc8..500242d3c96 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
return changed;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ int changed = !pmd_same(*pmdp, entry);
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+ if (changed && dirty) {
+ *pmdp = entry;
+ pmd_update_defer(vma->vm_mm, address, pmdp);
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ }
+
+ return changed;
+}
+#endif
+
int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ int ret = 0;
+
+ if (pmd_young(*pmdp))
+ ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+ (unsigned long *)pmdp);
+
+ if (ret)
+ pmd_update(vma->vm_mm, addr, pmdp);
+
+ return ret;
+}
+#endif
+
int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
return young;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ int young;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+ young = pmdp_test_and_clear_young(vma, address, pmdp);
+ if (young)
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+ return young;
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ int set;
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
+ (unsigned long *)pmdp);
+ if (set) {
+ pmd_update(vma->vm_mm, address, pmdp);
+ /* need tlb flush only to serialize against gup-fast */
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ }
+}
+#endif
+
/**
* reserve_top_address - reserves a hole in the top of kernel address space
* @reserve - size of hole to reserve
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index f16434568a5..ae96e7b8051 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -59,7 +59,6 @@ static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
static u8 __initdata apicid_to_pxm[MAX_APICID];
-int numa_off __initdata;
int acpi_numa __initdata;
static __init void bad_srat(void)