diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 107 |
1 files changed, 39 insertions, 68 deletions
diff --git a/mm/memory.c b/mm/memory.c index d209f745db7..2405289dfdf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,7 +58,7 @@ #include <linux/swapops.h> #include <linux/elf.h> -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; struct page *mem_map; @@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held. */ -static struct page * -__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) +static struct page *__follow_page(struct mm_struct *mm, unsigned long address, + int read, int write, int accessed) { pgd_t *pgd; pud_t *pud; @@ -811,16 +811,15 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) pte = *ptep; pte_unmap(ptep); if (pte_present(pte)) { - if (write && !pte_write(pte)) + if (write && !pte_dirty(pte)) goto out; if (read && !pte_read(pte)) goto out; pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - if (write && !pte_dirty(pte) && !PageDirty(page)) - set_page_dirty(page); - mark_page_accessed(page); + if (accessed) + mark_page_accessed(page); return page; } } @@ -829,33 +828,21 @@ out: return NULL; } -struct page * +inline struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { - return __follow_page(mm, address, /*read*/0, write); -} - -int -check_user_page_readable(struct mm_struct *mm, unsigned long address) -{ - return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; + return __follow_page(mm, address, 0, write, 1); } -EXPORT_SYMBOL(check_user_page_readable); - -/* - * Given a physical address, is there a useful struct page pointing to - * it? This may become more complex in the future if we start dealing - * with IO-aperture pages for direct-IO. +/* + * check_user_page_readable() can be called frm niterrupt context by oprofile, + * so we need to avoid taking any non-irq-safe locks */ - -static inline struct page *get_page_map(struct page *page) +int check_user_page_readable(struct mm_struct *mm, unsigned long address) { - if (!pfn_valid(page_to_pfn(page))) - return NULL; - return page; + return __follow_page(mm, address, 1, 0, 0) != NULL; } - +EXPORT_SYMBOL(check_user_page_readable); static inline int untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma, @@ -887,7 +874,6 @@ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma, return 0; } - int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) @@ -924,9 +910,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, pud = pud_offset(pgd, pg); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, pg); - BUG_ON(pmd_none(*pmd)); + if (pmd_none(*pmd)) + return i ? : -EFAULT; pte = pte_offset_map(pmd, pg); - BUG_ON(pte_none(*pte)); + if (pte_none(*pte)) { + pte_unmap(pte); + return i ? : -EFAULT; + } if (pages) { pages[i] = pte_page(*pte); get_page(pages[i]); @@ -951,21 +941,19 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } spin_lock(&mm->page_table_lock); do { - struct page *map; - int lookup_write = write; + struct page *page; cond_resched_lock(&mm->page_table_lock); - while (!(map = follow_page(mm, start, lookup_write))) { + while (!(page = follow_page(mm, start, write))) { /* * Shortcut for anonymous pages. We don't want * to force the creation of pages tables for - * insanly big anonymously mapped areas that + * insanely big anonymously mapped areas that * nobody touched so far. This is important * for doing a core dump for these mappings. */ - if (!lookup_write && - untouched_anonymous_page(mm,vma,start)) { - map = ZERO_PAGE(start); + if (!write && untouched_anonymous_page(mm,vma,start)) { + page = ZERO_PAGE(start); break; } spin_unlock(&mm->page_table_lock); @@ -983,41 +971,24 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } - /* - * Now that we have performed a write fault - * and surely no longer have a shared page we - * shouldn't write, we shouldn't ignore an - * unwritable page in the page table if - * we are forcing write access. - */ - lookup_write = write && !force; spin_lock(&mm->page_table_lock); } if (pages) { - pages[i] = get_page_map(map); - if (!pages[i]) { - spin_unlock(&mm->page_table_lock); - while (i--) - page_cache_release(pages[i]); - i = -EFAULT; - goto out; - } - flush_dcache_page(pages[i]); - if (!PageReserved(pages[i])) - page_cache_get(pages[i]); + pages[i] = page; + flush_dcache_page(page); + if (!PageReserved(page)) + page_cache_get(page); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; len--; - } while(len && start < vma->vm_end); + } while (len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); - } while(len); -out: + } while (len); return i; } - EXPORT_SYMBOL(get_user_pages); static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, @@ -1164,7 +1135,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; int err; @@ -1264,7 +1235,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, } old_page = pfn_to_page(pfn); - if (!TestSetPageLocked(old_page)) { + if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { int reuse = can_share_swap_page(old_page); unlock_page(old_page); if (reuse) { @@ -1483,7 +1454,7 @@ restart: * unmap_mapping_range - unmap the portion of all mmaps * in the specified address_space corresponding to the specified * page range in the underlying file. - * @address_space: the address space containing mmaps to be unmapped. + * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE * boundary. Note that this is different from vmtruncate(), which @@ -1711,10 +1682,6 @@ static int do_swap_page(struct mm_struct * mm, } /* The page isn't present yet, go ahead with the fault. */ - - swap_free(entry); - if (vm_swap_full()) - remove_exclusive_swap_page(page); inc_mm_counter(mm, rss); pte = mk_pte(page, vma->vm_page_prot); @@ -1722,12 +1689,16 @@ static int do_swap_page(struct mm_struct * mm, pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; } - unlock_page(page); flush_icache_page(vma, page); set_pte_at(mm, address, page_table, pte); page_add_anon_rmap(page, vma, address); + swap_free(entry); + if (vm_swap_full()) + remove_exclusive_swap_page(page); + unlock_page(page); + if (write_access) { if (do_wp_page(mm, vma, address, page_table, pmd, pte) == VM_FAULT_OOM) |