summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c243
1 files changed, 154 insertions, 89 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 2302d228fe0..262e3eb6601 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -61,6 +61,8 @@
#include <linux/swapops.h>
#include <linux/elf.h>
+#include "internal.h"
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
@@ -211,7 +213,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
*
* Must be called with pagetable lock held.
*/
-void free_pgd_range(struct mmu_gather **tlb,
+void free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
@@ -262,16 +264,16 @@ void free_pgd_range(struct mmu_gather **tlb,
return;
start = addr;
- pgd = pgd_offset((*tlb)->mm, addr);
+ pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+ free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end);
}
-void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long floor, unsigned long ceiling)
{
while (vma) {
@@ -899,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
}
if (unlikely(is_vm_hugetlb_page(vma))) {
- unmap_hugepage_range(vma, start, end);
- zap_work -= (end - start) /
- (HPAGE_SIZE / PAGE_SIZE);
+ /*
+ * It is undesirable to test vma->vm_file as it
+ * should be non-null for valid hugetlb area.
+ * However, vm_file will be NULL in the error
+ * cleanup path of do_mmap_pgoff. When
+ * hugetlbfs ->mmap method fails,
+ * do_mmap_pgoff() nullifies vma->vm_file
+ * before calling this function to clean up.
+ * Since no pte has actually been setup, it is
+ * safe to do nothing in this case.
+ */
+ if (vma->vm_file) {
+ unmap_hugepage_range(vma, start, end, NULL);
+ zap_work -= (end - start) /
+ pages_per_huge_page(hstate_vma(vma));
+ }
+
start = end;
} else
start = unmap_page_range(*tlbp, vma,
@@ -982,19 +998,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
goto no_page_table;
pud = pud_offset(pgd, address);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ if (pud_none(*pud))
goto no_page_table;
-
+ if (pud_huge(*pud)) {
+ BUG_ON(flags & FOLL_GET);
+ page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
+ goto out;
+ }
+ if (unlikely(pud_bad(*pud)))
+ goto no_page_table;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
goto no_page_table;
-
if (pmd_huge(*pmd)) {
BUG_ON(flags & FOLL_GET);
page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
goto out;
}
-
if (unlikely(pmd_bad(*pmd)))
goto no_page_table;
@@ -1058,11 +1079,9 @@ static inline int use_zero_page(struct vm_area_struct *vma)
if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
return 0;
/*
- * And if we have a fault or a nopfn routine, it's not an
- * anonymous region.
+ * And if we have a fault routine, it's not an anonymous region.
*/
- return !vma->vm_ops ||
- (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+ return !vma->vm_ops || !vma->vm_ops->fault;
}
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -1338,6 +1357,11 @@ out:
*
* This function should only be called from a vm_ops->fault handler, and
* in that case the handler should return NULL.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
*/
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
@@ -1548,6 +1572,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long next;
int err;
+ BUG_ON(pud_huge(*pud));
+
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return -ENOMEM;
@@ -2501,59 +2527,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
-
-/*
- * do_no_pfn() tries to create a new page mapping for a page without
- * a struct_page backing it
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
- *
- * It is expected that the ->nopfn handler always returns the same pfn
- * for a given virtual mapping.
- *
- * Mark this `noinline' to prevent it from bloating the main pagefault code.
- */
-static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
- int write_access)
-{
- spinlock_t *ptl;
- pte_t entry;
- unsigned long pfn;
-
- pte_unmap(page_table);
- BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
- BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
-
- pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
-
- BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
-
- if (unlikely(pfn == NOPFN_OOM))
- return VM_FAULT_OOM;
- else if (unlikely(pfn == NOPFN_SIGBUS))
- return VM_FAULT_SIGBUS;
- else if (unlikely(pfn == NOPFN_REFAULT))
- return 0;
-
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-
- /* Only go through if we didn't race with anybody else... */
- if (pte_none(*page_table)) {
- entry = pfn_pte(pfn, vma->vm_page_prot);
- if (write_access)
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- set_pte_at(mm, address, page_table, entry);
- }
- pte_unmap_unlock(page_table, ptl);
- return 0;
-}
-
/*
* Fault of a previously existing named mapping. Repopulate the pte
* from the encoded file_pte if possible. This enables swappable
@@ -2614,9 +2587,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
if (likely(vma->vm_ops->fault))
return do_linear_fault(mm, vma, address,
pte, pmd, write_access, entry);
- if (unlikely(vma->vm_ops->nopfn))
- return do_no_pfn(mm, vma, address, pte,
- pmd, write_access);
}
return do_anonymous_page(mm, vma, address,
pte, pmd, write_access);
@@ -2804,6 +2774,86 @@ int in_gate_area_no_task(unsigned long addr)
#endif /* __HAVE_ARCH_GATE_AREA */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+static resource_size_t follow_phys(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags,
+ unsigned long *prot)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+ resource_size_t phys_addr = 0;
+ struct mm_struct *mm = vma->vm_mm;
+
+ VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto no_page_table;
+
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ goto no_page_table;
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto no_page_table;
+
+ /* We cannot handle huge page PFN maps. Luckily they don't exist. */
+ if (pmd_huge(*pmd))
+ goto no_page_table;
+
+ ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ if (!ptep)
+ goto out;
+
+ pte = *ptep;
+ if (!pte_present(pte))
+ goto unlock;
+ if ((flags & FOLL_WRITE) && !pte_write(pte))
+ goto unlock;
+ phys_addr = pte_pfn(pte);
+ phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
+
+ *prot = pgprot_val(pte_pgprot(pte));
+
+unlock:
+ pte_unmap_unlock(ptep, ptl);
+out:
+ return phys_addr;
+no_page_table:
+ return 0;
+}
+
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ resource_size_t phys_addr;
+ unsigned long prot = 0;
+ void *maddr;
+ int offset = addr & (PAGE_SIZE-1);
+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ return -EINVAL;
+
+ phys_addr = follow_phys(vma, addr, write, &prot);
+
+ if (!phys_addr)
+ return -EINVAL;
+
+ maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+ if (write)
+ memcpy_toio(maddr + offset, buf, len);
+ else
+ memcpy_fromio(buf, maddr + offset, len);
+ iounmap(maddr);
+
+ return len;
+}
+#endif
+
/*
* Access another process' address space.
* Source/target buffer must be kernel space,
@@ -2813,7 +2863,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
{
struct mm_struct *mm;
struct vm_area_struct *vma;
- struct page *page;
void *old_buf = buf;
mm = get_task_mm(tsk);
@@ -2825,28 +2874,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
while (len) {
int bytes, ret, offset;
void *maddr;
+ struct page *page = NULL;
ret = get_user_pages(tsk, mm, addr, 1,
write, 1, &page, &vma);
- if (ret <= 0)
- break;
-
- bytes = len;
- offset = addr & (PAGE_SIZE-1);
- if (bytes > PAGE_SIZE-offset)
- bytes = PAGE_SIZE-offset;
-
- maddr = kmap(page);
- if (write) {
- copy_to_user_page(vma, page, addr,
- maddr + offset, buf, bytes);
- set_page_dirty_lock(page);
+ if (ret <= 0) {
+ /*
+ * Check if this is a VM_IO | VM_PFNMAP VMA, which
+ * we can access using slightly different code.
+ */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+ vma = find_vma(mm, addr);
+ if (!vma)
+ break;
+ if (vma->vm_ops && vma->vm_ops->access)
+ ret = vma->vm_ops->access(vma, addr, buf,
+ len, write);
+ if (ret <= 0)
+#endif
+ break;
+ bytes = ret;
} else {
- copy_from_user_page(vma, page, addr,
- buf, maddr + offset, bytes);
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ page_cache_release(page);
}
- kunmap(page);
- page_cache_release(page);
len -= bytes;
buf += bytes;
addr += bytes;