summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c19
-rw-r--r--arch/x86/mm/hugetlbpage.c3
-rw-r--r--arch/x86/mm/init.c58
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/pat.c87
-rw-r--r--arch/x86/mm/pat_rbtree.c34
7 files changed, 134 insertions, 76 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7dde46d68a2..8e13ecb41be 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -996,6 +996,17 @@ static int fault_in_kernel_space(unsigned long address)
return address >= TASK_SIZE_MAX;
}
+static inline bool smap_violation(int error_code, struct pt_regs *regs)
+{
+ if (error_code & PF_USER)
+ return false;
+
+ if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+ return false;
+
+ return true;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -1089,6 +1100,13 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
+ if (static_cpu_has(X86_FEATURE_SMAP)) {
+ if (unlikely(smap_violation(error_code, regs))) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
+ }
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
@@ -1202,6 +1220,7 @@ good_area:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e4851242..937bff5cdaa 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
- struct prio_tree_iter iter;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
- vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+ vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ab1f6a93b52..d7aea41563b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -35,40 +35,44 @@ struct map_range {
unsigned page_size_mask;
};
-static void __init find_early_table_space(struct map_range *mr, unsigned long end,
- int use_pse, int use_gbpages)
+/*
+ * First calculate space needed for kernel direct mapping page tables to cover
+ * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
+ * pages. Then find enough contiguous space for those page tables.
+ */
+static void __init find_early_table_space(struct map_range *mr, int nr_range)
{
- unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+ int i;
+ unsigned long puds = 0, pmds = 0, ptes = 0, tables;
+ unsigned long start = 0, good_end;
phys_addr_t base;
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
-
- if (use_gbpages) {
- unsigned long extra;
-
- extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
- pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ for (i = 0; i < nr_range; i++) {
+ unsigned long range, extra;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+ range = mr[i].end - mr[i].start;
+ puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
- if (use_pse) {
- unsigned long extra;
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
+ extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
+ pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+ } else {
+ pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
+ extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
+ extra += PMD_SIZE;
#endif
- /* The first 2/4M doesn't use large pages. */
- if (mr->start < PMD_SIZE)
- extra += mr->end - mr->start;
-
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ } else {
+ ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ }
+ }
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
#ifdef CONFIG_X86_32
@@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
- end - 1, pgt_buf_start << PAGE_SHIFT,
+ mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
(pgt_buf_top << PAGE_SHIFT) - 1);
}
@@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(&mr[0], end, use_pse, use_gbpages);
+ find_early_table_space(mr, nr_range);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4f04db15002..11a58001b4c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -709,7 +709,7 @@ static void __init test_wp_bit(void)
"Checking if this processor honours the WP bit even in supervisor mode...");
/* Any page-aligned address will do, the test is non-destructive */
- __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
+ __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO);
boot_cpu_data.wp_works_ok = do_test_wp_bit();
clear_fixmap(FIX_WP_TEST);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2b6b4a3c8be..3baff255ada 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
* these mappings are more intelligent.
*/
if (pte_val(*pte)) {
- pages++;
+ if (!after_bootmem)
+ pages++;
continue;
}
@@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_2M)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
@@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_1G)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d68ef6d226..0eb572eda40 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
}
/*
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
*
* If the vma has a linear pfn mapping for the entire range, we get the prot
* from pte and reserve the entire vma range with single reserve_pfn_range call.
*/
-int track_pfn_vma_copy(struct vm_area_struct *vma)
+int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
- if (is_linear_pfn_mapping(vma)) {
+ if (vma->vm_flags & VM_PAT) {
/*
* reserve the whole chunk covered by vma. We need the
* starting address and protection from pte.
@@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
}
/*
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- *
* prot is passed in as a parameter for the new mapping. If the vma has a
* linear pfn mapping for the entire range reserve the entire vma range with
* single reserve_pfn_range call.
*/
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long size)
+int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn, unsigned long addr, unsigned long size)
{
+ resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
unsigned long flags;
- resource_size_t paddr;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (is_linear_pfn_mapping(vma)) {
- /* reserve the whole chunk starting from vm_pgoff */
- paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- return reserve_pfn_range(paddr, vma_size, prot, 0);
+ /* reserve the whole chunk starting from paddr */
+ if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+ int ret;
+
+ ret = reserve_pfn_range(paddr, size, prot, 0);
+ if (!ret)
+ vma->vm_flags |= VM_PAT;
+ return ret;
}
if (!pat_enabled)
return 0;
- /* for vm_insert_pfn and friends, we set prot based on lookup */
- flags = lookup_memtype(pfn << PAGE_SHIFT);
+ /*
+ * For anything smaller than the vma size we set prot based on the
+ * lookup.
+ */
+ flags = lookup_memtype(paddr);
+
+ /* Check memtype for the remaining pages */
+ while (size > PAGE_SIZE) {
+ size -= PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ if (flags != lookup_memtype(paddr))
+ return -EINVAL;
+ }
+
+ *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ flags);
+
+ return 0;
+}
+
+int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn)
+{
+ unsigned long flags;
+
+ if (!pat_enabled)
+ return 0;
+
+ /* Set prot based on lookup */
+ flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags);
@@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
}
/*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
*/
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size)
+void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ unsigned long size)
{
resource_size_t paddr;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
+ unsigned long prot;
- if (is_linear_pfn_mapping(vma)) {
- /* free the whole chunk starting from vm_pgoff */
- paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- free_pfn_range(paddr, vma_size);
+ if (!(vma->vm_flags & VM_PAT))
return;
+
+ /* free the chunk starting from pfn or the whole chunk */
+ paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ if (!paddr && !size) {
+ if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ size = vma->vm_end - vma->vm_start;
}
+ free_pfn_range(paddr, size);
+ vma->vm_flags &= ~VM_PAT;
}
pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 8acaddd0fb2..415f6c4ced3 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -12,7 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
#include <linux/sched.h>
#include <linux/gfp.h>
@@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node)
return ret;
}
-/* Update 'subtree_max_end' for a node, based on node and its children */
-static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
+static u64 compute_subtree_max_end(struct memtype *data)
{
- struct memtype *data;
- u64 max_end, child_max_end;
-
- if (!node)
- return;
+ u64 max_end = data->end, child_max_end;
- data = container_of(node, struct memtype, rb);
- max_end = data->end;
-
- child_max_end = get_subtree_max_end(node->rb_right);
+ child_max_end = get_subtree_max_end(data->rb.rb_right);
if (child_max_end > max_end)
max_end = child_max_end;
- child_max_end = get_subtree_max_end(node->rb_left);
+ child_max_end = get_subtree_max_end(data->rb.rb_left);
if (child_max_end > max_end)
max_end = child_max_end;
- data->subtree_max_end = max_end;
+ return max_end;
}
+RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
+ u64, subtree_max_end, compute_subtree_max_end)
+
/* Find the first (lowest start addr) overlapping range from rb tree */
static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
u64 start, u64 end)
@@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
struct memtype *data = container_of(*node, struct memtype, rb);
parent = *node;
+ if (data->subtree_max_end < newdata->end)
+ data->subtree_max_end = newdata->end;
if (newdata->start <= data->start)
node = &((*node)->rb_left);
else if (newdata->start > data->start)
node = &((*node)->rb_right);
}
+ newdata->subtree_max_end = newdata->end;
rb_link_node(&newdata->rb, parent, node);
- rb_insert_color(&newdata->rb, root);
- rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
+ rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
}
int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
- struct rb_node *deepest;
struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
if (!data)
goto out;
- deepest = rb_augment_erase_begin(&data->rb);
- rb_erase(&data->rb, &memtype_rbroot);
- rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
+ rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
out:
return data;
}