From 63551ae0feaaa23807ebea60de1901564bbef32e Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 21 Jun 2005 17:14:44 -0700 Subject: [PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/mm/hugetlbpage.c | 180 +------------------------------------------- 1 file changed, 2 insertions(+), 178 deletions(-) (limited to 'arch/ppc64/mm') diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index d3bf86a5c1a..b4ab766f598 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr return hugepte_offset(dir, addr); } -static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pud_t *pud; @@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return hugepte_offset(pud, addr); } -static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pud_t *pud; @@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return hugepte_alloc(mm, pud, addr); } -static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, - pte_t *ptep, int write_access) -{ - pte_t entry; - - add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); - if (write_access) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - } else { - entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); - } - entry = pte_mkyoung(entry); - entry = pte_mkhuge(entry); - - set_pte_at(mm, addr, ptep, entry); -} - /* * This function checks for proper alignment of input addr and len parameters. */ @@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len) return -EINVAL; } -int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) -{ - pte_t *src_pte, *dst_pte, entry; - struct page *ptepage; - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - int err = -ENOMEM; - - while (addr < end) { - dst_pte = huge_pte_alloc(dst, addr); - if (!dst_pte) - goto out; - - src_pte = huge_pte_offset(src, addr); - entry = *src_pte; - - ptepage = pte_page(entry); - get_page(ptepage); - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); - set_pte_at(dst, addr, dst_pte, entry); - - addr += HPAGE_SIZE; - } - - err = 0; - out: - return err; -} - -int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i) -{ - unsigned long vpfn, vaddr = *position; - int remainder = *length; - - WARN_ON(!is_vm_hugetlb_page(vma)); - - vpfn = vaddr/PAGE_SIZE; - while (vaddr < vma->vm_end && remainder) { - if (pages) { - pte_t *pte; - struct page *page; - - pte = huge_pte_offset(mm, vaddr); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageCompound(page)); - - get_page(page); - pages[i] = page; - } - - if (vmas) - vmas[i] = vma; - - vaddr += PAGE_SIZE; - ++vpfn; - --remainder; - ++i; - } - - *length = remainder; - *position = vaddr; - - return i; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { @@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -void unmap_hugepage_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long addr; - pte_t *ptep; - struct page *page; - - WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON((start % HPAGE_SIZE) != 0); - BUG_ON((end % HPAGE_SIZE) != 0); - - for (addr = start; addr < end; addr += HPAGE_SIZE) { - pte_t pte; - - ptep = huge_pte_offset(mm, addr); - if (!ptep || pte_none(*ptep)) - continue; - - pte = *ptep; - page = pte_page(pte); - pte_clear(mm, addr, ptep); - - put_page(page); - } - add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); - flush_tlb_pending(); -} - -int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) -{ - struct mm_struct *mm = current->mm; - unsigned long addr; - int ret = 0; - - WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON((vma->vm_start % HPAGE_SIZE) != 0); - BUG_ON((vma->vm_end % HPAGE_SIZE) != 0); - - spin_lock(&mm->page_table_lock); - for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { - unsigned long idx; - pte_t *pte = huge_pte_alloc(mm, addr); - struct page *page; - - if (!pte) { - ret = -ENOMEM; - goto out; - } - if (! pte_none(*pte)) - continue; - - idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) - + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); - page = find_get_page(mapping, idx); - if (!page) { - /* charge the fs quota first */ - if (hugetlb_get_quota(mapping)) { - ret = -ENOMEM; - goto out; - } - page = alloc_huge_page(); - if (!page) { - hugetlb_put_quota(mapping); - ret = -ENOMEM; - goto out; - } - ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); - if (! ret) { - unlock_page(page); - } else { - hugetlb_put_quota(mapping); - free_huge_page(page); - goto out; - } - } - set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE); - } -out: - spin_unlock(&mm->page_table_lock); - return ret; -} - /* Because we have an exclusive hugepage region which lies within the * normal user address space, we have to take special measures to make * non-huge mmap()s evade the hugepage reserved regions. */ -- cgit v1.2.3-70-g09d2 From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001 From: Wolfgang Wander Date: Tue, 21 Jun 2005 17:14:49 -0700 Subject: [PATCH] Avoiding mmap fragmentation Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander Credit-to: "Richard Purdie" Signed-off-by: Ken Chen Acked-by: Ingo Molnar (partly) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmap.c | 10 +++++++- arch/i386/mm/hugetlbpage.c | 34 ++++++++++++++++++++++---- arch/ppc64/mm/hugetlbpage.c | 34 ++++++++++++++++++++++---- arch/sh/kernel/sys_sh.c | 8 +++++++ arch/sparc64/kernel/sys_sparc.c | 8 +++++++ arch/x86_64/ia32/ia32_aout.c | 1 + arch/x86_64/kernel/sys_x86_64.c | 9 +++++++ fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 1 + fs/hugetlbfs/inode.c | 3 +++ include/linux/sched.h | 11 +++++---- kernel/fork.c | 2 ++ mm/mmap.c | 53 +++++++++++++++++++++++++++++++---------- mm/nommu.c | 2 +- 14 files changed, 147 insertions(+), 30 deletions(-) (limited to 'arch/ppc64/mm') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 32c4b0e35b3..3de7f84b53c 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -73,7 +73,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - start_addr = addr = mm->free_area_cache; + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: if (do_align) @@ -90,6 +95,7 @@ full_search: */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -101,6 +107,8 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; if (do_align) addr = COLOUR_ALIGN(addr, pgoff); diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 5aa06001a4b..3b099f32b94 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -140,7 +140,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, struct vm_area_struct *vma; unsigned long start_addr; - start_addr = mm->free_area_cache; + if (len > mm->cached_hole_size) { + start_addr = mm->free_area_cache; + } else { + start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: addr = ALIGN(start_addr, HPAGE_SIZE); @@ -154,6 +159,7 @@ full_search: */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -162,6 +168,8 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = ALIGN(vma->vm_end, HPAGE_SIZE); } } @@ -173,12 +181,17 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev_vma; unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; int first_time = 1; /* don't allow allocations above current base */ if (mm->free_area_cache > base) mm->free_area_cache = base; + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } try_again: /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) @@ -199,13 +212,21 @@ try_again: * vma->vm_start, use it: */ if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) + (!prev_vma || (addr >= prev_vma->vm_end))) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - else + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } else { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) + if (mm->free_area_cache == vma->vm_end) { mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ addr = (vma->vm_start - len) & HPAGE_MASK; @@ -218,6 +239,7 @@ fail: */ if (first_time) { mm->free_area_cache = base; + largest_hole = 0; first_time = 0; goto try_again; } @@ -228,6 +250,7 @@ fail: * allocations. */ mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; addr = hugetlb_get_unmapped_area_bottomup(file, addr0, len, pgoff, flags); @@ -235,6 +258,7 @@ fail: * Restore the topdown base: */ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; return addr; } diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index b4ab766f598..fdcfe97c75c 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -292,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, && !is_hugepage_only_range(mm, addr,len)) return addr; } - start_addr = addr = mm->free_area_cache; + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: vma = find_vma(mm, addr); @@ -316,6 +321,8 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; vma = vma->vm_next; } @@ -323,6 +330,7 @@ full_search: /* Make sure we didn't miss any holes */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -344,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct vm_area_struct *vma, *prev_vma; struct mm_struct *mm = current->mm; unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; int first_time = 1; /* requested length too big for entire address space */ @@ -364,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } try_again: /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) @@ -392,13 +405,21 @@ hugepage_recheck: * vma->vm_start, use it: */ if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) + (!prev_vma || (addr >= prev_vma->vm_end))) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - else + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } else { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) + if (mm->free_area_cache == vma->vm_end) { mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ addr = vma->vm_start-len; @@ -411,6 +432,7 @@ fail: */ if (first_time) { mm->free_area_cache = base; + largest_hole = 0; first_time = 0; goto try_again; } @@ -421,11 +443,13 @@ fail: * allocations. */ mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; return addr; } diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index df5ac294c37..917b2f32f26 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -79,6 +79,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = TASK_UNMAPPED_BASE; + } if (flags & MAP_PRIVATE) addr = PAGE_ALIGN(mm->free_area_cache); else @@ -95,6 +99,7 @@ full_search: */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -106,6 +111,9 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; if (!(flags & MAP_PRIVATE)) addr = COLOUR_ALIGN(addr); diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 0077f02f4b3..5f8c822a2b4 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -84,6 +84,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi return addr; } + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = TASK_UNMAPPED_BASE; + } start_addr = addr = mm->free_area_cache; task_size -= len; @@ -103,6 +107,7 @@ full_search: if (task_size < addr) { if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -114,6 +119,9 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 1965efc974d..c12edf5d97f 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -312,6 +312,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = TASK_UNMAPPED_BASE; + current->mm->cached_hole_size = 0; set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index d9798dd433f..cc7821c6885 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -105,6 +105,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } + if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) + && len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = begin; + } addr = mm->free_area_cache; if (addr < begin) addr = begin; @@ -120,6 +125,7 @@ full_search: */ if (start_addr != begin) { start_addr = addr = begin; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -131,6 +137,9 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; } } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 009b8920c1f..dd9baabaf01 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8f6b6b7617..7976a238f0a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) change some of these later */ set_mm_counter(current->mm, rss, 0); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2af3338f891..3a9b6d179cb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, start_addr = mm->free_area_cache; + if (len <= mm->cached_hole_size) + start_addr = TASK_UNMAPPED_BASE; + full_search: addr = ALIGN(start_addr, HPAGE_SIZE); diff --git a/include/linux/sched.h b/include/linux/sched.h index 4dbb109022f..b58afd97a18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -201,8 +201,8 @@ extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -extern void arch_unmap_area(struct vm_area_struct *area); -extern void arch_unmap_area_topdown(struct vm_area_struct *area); +extern void arch_unmap_area(struct mm_struct *, unsigned long); +extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define get_mm_counter(mm, member) ((mm)->_##member) @@ -218,9 +218,10 @@ struct mm_struct { unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); - void (*unmap_area) (struct vm_area_struct *area); - unsigned long mmap_base; /* base of mmap area */ - unsigned long free_area_cache; /* first hole */ + void (*unmap_area) (struct mm_struct *mm, unsigned long addr); + unsigned long mmap_base; /* base of mmap area */ + unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ + unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ diff --git a/kernel/fork.c b/kernel/fork.c index f42a17f8869..876b31cd822 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -194,6 +194,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) mm->mmap = NULL; mm->mmap_cache = NULL; mm->free_area_cache = oldmm->mmap_base; + mm->cached_hole_size = ~0UL; mm->map_count = 0; set_mm_counter(mm, rss, 0); set_mm_counter(mm, anon_rss, 0); @@ -322,6 +323,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) mm->ioctx_list = NULL; mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; diff --git a/mm/mmap.c b/mm/mmap.c index de54acd9942..9da23c1ef9d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1175,7 +1175,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - start_addr = addr = mm->free_area_cache; + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { @@ -1186,7 +1191,9 @@ full_search: * some holes. */ if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + addr = TASK_UNMAPPED_BASE; + start_addr = addr; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -1198,19 +1205,22 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; } } #endif -void arch_unmap_area(struct vm_area_struct *area) +void arch_unmap_area(struct mm_struct *mm, unsigned long addr) { /* * Is this a new hole at the lowest possible address? */ - if (area->vm_start >= TASK_UNMAPPED_BASE && - area->vm_start < area->vm_mm->free_area_cache) - area->vm_mm->free_area_cache = area->vm_start; + if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { + mm->free_area_cache = addr; + mm->cached_hole_size = ~0UL; + } } /* @@ -1240,6 +1250,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + /* either no address requested or can't fit in requested address hole */ addr = mm->free_area_cache; @@ -1264,6 +1280,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + /* try just below the current vma->vm_start */ addr = vma->vm_start-len; } while (len < vma->vm_start); @@ -1274,28 +1294,30 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; return addr; } #endif -void arch_unmap_area_topdown(struct vm_area_struct *area) +void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) { /* * Is this a new hole at the highest possible address? */ - if (area->vm_end > area->vm_mm->free_area_cache) - area->vm_mm->free_area_cache = area->vm_end; + if (addr > mm->free_area_cache) + mm->free_area_cache = addr; /* dont allow allocations above current base */ - if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base) - area->vm_mm->free_area_cache = area->vm_mm->mmap_base; + if (mm->free_area_cache > mm->mmap_base) + mm->free_area_cache = mm->mmap_base; } unsigned long @@ -1595,7 +1617,6 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) if (area->vm_flags & VM_LOCKED) area->vm_mm->locked_vm -= len >> PAGE_SHIFT; vm_stat_unaccount(area); - area->vm_mm->unmap_area(area); remove_vm_struct(area); } @@ -1649,6 +1670,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, { struct vm_area_struct **insertion_point; struct vm_area_struct *tail_vma = NULL; + unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); do { @@ -1659,6 +1681,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } while (vma && vma->vm_start < end); *insertion_point = vma; tail_vma->vm_next = NULL; + if (mm->unmap_area == arch_unmap_area) + addr = prev ? prev->vm_end : mm->mmap_base; + else + addr = vma ? vma->vm_start : mm->mmap_base; + mm->unmap_area(mm, addr); mm->mmap_cache = NULL; /* Kill the cache. */ } diff --git a/mm/nommu.c b/mm/nommu.c index c53e9c8f6b4..ce74452c02d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1067,7 +1067,7 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } -void arch_unmap_area(struct vm_area_struct *area) +void arch_unmap_area(struct mm_struct *mm, unsigned long addr) { } -- cgit v1.2.3-70-g09d2 From 20cee16ced631f70a62c97bdebae08a1c9470448 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 21 Jun 2005 17:15:31 -0700 Subject: [PATCH] ppc64: Abolish ioremap_mm Currently ppc64 has two mm_structs for the kernel, init_mm and also ioremap_mm. The latter really isn't necessary: this patch abolishes it, instead restricting vmallocs to the lower 1TB of the init_mm's range and placing io mappings in the upper 1TB. This simplifies the code in a number of places and eliminates an unecessary set of pagetables. It also tweaks the unmap/free path a little, allowing us to remove the unmap_im_area() set of page table walkers, replacing them with unmap_vm_area(). Signed-off-by: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/eeh.c | 2 +- arch/ppc64/kernel/head.S | 4 -- arch/ppc64/kernel/process.c | 8 ---- arch/ppc64/mm/hash_utils.c | 4 -- arch/ppc64/mm/imalloc.c | 20 ++++++---- arch/ppc64/mm/init.c | 93 +++++-------------------------------------- include/asm-ppc64/imalloc.h | 12 +++--- include/asm-ppc64/page.h | 2 - include/asm-ppc64/pgtable.h | 9 +---- include/asm-ppc64/processor.h | 10 ----- 10 files changed, 31 insertions(+), 133 deletions(-) (limited to 'arch/ppc64/mm') diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c index d63d41f3eec..af5272fedad 100644 --- a/arch/ppc64/kernel/eeh.c +++ b/arch/ppc64/kernel/eeh.c @@ -505,7 +505,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pte_t *ptep; unsigned long pa; - ptep = find_linux_pte(ioremap_mm.pgd, token); + ptep = find_linux_pte(init_mm.pgd, token); if (!ptep) return token; pa = pte_pfn(*ptep) << PAGE_SHIFT; diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 346dbf606b5..02c8f4e3e4b 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -2121,10 +2121,6 @@ empty_zero_page: swapper_pg_dir: .space 4096 - .globl ioremap_dir -ioremap_dir: - .space 4096 - #ifdef CONFIG_SMP /* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */ .globl stab_array diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index cdfecbeb331..aba89554d89 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -58,14 +58,6 @@ struct task_struct *last_task_used_math = NULL; struct task_struct *last_task_used_altivec = NULL; #endif -struct mm_struct ioremap_mm = { - .pgd = ioremap_dir, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .cpu_vm_mask = CPU_MASK_ALL, - .page_table_lock = SPIN_LOCK_UNLOCKED, -}; - /* * Make sure the floating-point register state in the * the thread_struct is up to date for task tsk. diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 0a0f97008d0..87d0525f56f 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -310,10 +310,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea); break; - case IO_REGION_ID: - mm = &ioremap_mm; - vsid = get_kernel_vsid(ea); - break; case VMALLOC_REGION_ID: mm = &init_mm; vsid = get_kernel_vsid(ea); diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index cb8727f3267..b6e75b891ac 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -15,6 +15,7 @@ #include #include #include +#include static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -285,29 +286,32 @@ struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, return area; } -unsigned long im_free(void * addr) +void im_free(void * addr) { struct vm_struct **p, *tmp; - unsigned long ret_size = 0; if (!addr) - return ret_size; - if ((PAGE_SIZE-1) & (unsigned long) addr) { + return; + if ((unsigned long) addr & ~PAGE_MASK) { printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); - return ret_size; + return; } down(&imlist_sem); for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { - ret_size = tmp->size; *p = tmp->next; + + /* XXX: do we need the lock? */ + spin_lock(&init_mm.page_table_lock); + unmap_vm_area(tmp); + spin_unlock(&init_mm.page_table_lock); + kfree(tmp); up(&imlist_sem); - return ret_size; + return; } } up(&imlist_sem); printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, addr); - return ret_size; } diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 4b42aff74d7..6fa1e6490b5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -73,9 +73,6 @@ static unsigned long phbs_io_bot = PHBS_IO_BASE; extern pgd_t swapper_pg_dir[]; extern struct task_struct *current_set[NR_CPUS]; -extern pgd_t ioremap_dir[]; -pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir; - unsigned long klimit = (unsigned long)_end; unsigned long _SDR1=0; @@ -137,69 +134,6 @@ void iounmap(volatile void __iomem *addr) #else -static void unmap_im_area_pte(pmd_t *pmd, unsigned long addr, - unsigned long end) -{ - pte_t *pte; - - pte = pte_offset_kernel(pmd, addr); - do { - pte_t ptent = ptep_get_and_clear(&ioremap_mm, addr, pte); - WARN_ON(!pte_none(ptent) && !pte_present(ptent)); - } while (pte++, addr += PAGE_SIZE, addr != end); -} - -static inline void unmap_im_area_pmd(pud_t *pud, unsigned long addr, - unsigned long end) -{ - pmd_t *pmd; - unsigned long next; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - unmap_im_area_pte(pmd, addr, next); - } while (pmd++, addr = next, addr != end); -} - -static inline void unmap_im_area_pud(pgd_t *pgd, unsigned long addr, - unsigned long end) -{ - pud_t *pud; - unsigned long next; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - unmap_im_area_pmd(pud, addr, next); - } while (pud++, addr = next, addr != end); -} - -static void unmap_im_area(unsigned long addr, unsigned long end) -{ - struct mm_struct *mm = &ioremap_mm; - unsigned long next; - pgd_t *pgd; - - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset_i(addr); - flush_cache_vunmap(addr, end); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - unmap_im_area_pud(pgd, addr, next); - } while (pgd++, addr = next, addr != end); - flush_tlb_kernel_range(start, end); - - spin_unlock(&mm->page_table_lock); -} - /* * map_io_page currently only called by __ioremap * map_io_page adds an entry to the ioremap page table @@ -214,21 +148,21 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) unsigned long vsid; if (mem_init_done) { - spin_lock(&ioremap_mm.page_table_lock); - pgdp = pgd_offset_i(ea); - pudp = pud_alloc(&ioremap_mm, pgdp, ea); + spin_lock(&init_mm.page_table_lock); + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; - pmdp = pmd_alloc(&ioremap_mm, pudp, ea); + pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; - ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); + ptep = pte_alloc_kernel(&init_mm, pmdp, ea); if (!ptep) return -ENOMEM; pa = abs_to_phys(pa); - set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); - spin_unlock(&ioremap_mm.page_table_lock); + spin_unlock(&init_mm.page_table_lock); } else { unsigned long va, vpn, hash, hpteg; @@ -267,13 +201,9 @@ static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, for (i = 0; i < size; i += PAGE_SIZE) if (map_io_page(ea+i, pa+i, flags)) - goto failure; + return NULL; return (void __iomem *) (ea + (addr & ~PAGE_MASK)); - failure: - if (mem_init_done) - unmap_im_area(ea, ea + size); - return NULL; } @@ -381,19 +311,14 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, */ void iounmap(volatile void __iomem *token) { - unsigned long address, size; void *addr; if (!mem_init_done) return; addr = (void *) ((unsigned long __force) token & PAGE_MASK); - - if ((size = im_free(addr)) == 0) - return; - address = (unsigned long)addr; - unmap_im_area(address, address + size); + im_free(addr); } static int iounmap_subset_regions(unsigned long addr, unsigned long size) diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h index 3a45e918bf1..e46ff68a6e4 100644 --- a/include/asm-ppc64/imalloc.h +++ b/include/asm-ppc64/imalloc.h @@ -4,9 +4,9 @@ /* * Define the address range of the imalloc VM area. */ -#define PHBS_IO_BASE IOREGIONBASE -#define IMALLOC_BASE (IOREGIONBASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (IOREGIONBASE + EADDR_MASK) +#define PHBS_IO_BASE VMALLOC_END +#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ +#define IMALLOC_END (VMALLOC_START + EADDR_MASK) /* imalloc region types */ @@ -18,7 +18,9 @@ extern struct vm_struct * im_get_free_area(unsigned long size); extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, - int region_type); -unsigned long im_free(void *addr); + int region_type); +extern void im_free(void *addr); + +extern unsigned long ioremap_bot; #endif /* _PPC64_IMALLOC_H */ diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index bcd21789d3b..257d87eb7c3 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -202,9 +202,7 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define PAGE_OFFSET ASM_CONST(0xC000000000000000) #define KERNELBASE PAGE_OFFSET #define VMALLOCBASE ASM_CONST(0xD000000000000000) -#define IOREGIONBASE ASM_CONST(0xE000000000000000) -#define IO_REGION_ID (IOREGIONBASE >> REGION_SHIFT) #define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) #define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) #define USER_REGION_ID (0UL) diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 264c4f7993b..46cf61c2ff6 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -53,7 +53,8 @@ * Define the address range of the vmalloc VM area. */ #define VMALLOC_START (0xD000000000000000ul) -#define VMALLOC_END (VMALLOC_START + EADDR_MASK) +#define VMALLOC_SIZE (0x10000000000UL) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* * Bits in a linux-style PTE. These match the bits in the @@ -239,9 +240,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) /* This now only contains the vmalloc pages */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* to find an entry in the ioremap page-table-directory */ -#define pgd_offset_i(address) (ioremap_pgd + pgd_index(address)) - /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -459,15 +457,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) -extern unsigned long ioremap_bot, ioremap_base; - #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) extern pgd_t swapper_pg_dir[]; -extern pgd_t ioremap_dir[]; extern void paging_init(void); diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 809c634ba1d..3084099086a 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -429,16 +429,6 @@ struct thread_struct { .fpexc_mode = MSR_FE0|MSR_FE1, \ } -/* - * Note: the vm_start and vm_end fields here should *not* - * be in kernel space. (Could vm_end == vm_start perhaps?) - */ -#define IOREMAP_MMAP { &ioremap_mm, 0, 0x1000, NULL, \ - PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, \ - 1, NULL, NULL } - -extern struct mm_struct ioremap_mm; - /* * Return saved PC of a blocked thread. For now, this is the "user" PC */ -- cgit v1.2.3-70-g09d2 From 515bae9cdc6a78eda0879e1f158056d73ec808b7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 21 Jun 2005 17:15:55 -0700 Subject: [PATCH] ppc64: Mark kernel hptes dirty We dont use the hardware referenced and changed bits and setting them early avoids a store to memory. We already do this for userspace hptes but not kernel ones. Do it. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/mm/hash_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ppc64/mm') diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 87d0525f56f..1647b1c6f28 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -195,7 +195,7 @@ void __init htab_initialize(void) memset((void *)table, 0, htab_size_bytes); } - mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; /* On U3 based machines, we need to reserve the DART area and * _NOT_ map it to avoid cache paradoxes as it's remapped non -- cgit v1.2.3-70-g09d2