From b50516fc20f756cf4d18a89f6f9977d60151ccba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:47:55 +0100 Subject: x86: CPA remove bogus NX clear In split_large_page we clear the NX bit for the new split ptes, but we need to preserve the original setting of it for the split ptes. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e297bd65e51..877b5cca2cb 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -225,7 +225,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif - pgprot_val(ref_prot) &= ~_PAGE_NX; for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); -- cgit v1.2.3-70-g09d2 From cc0f21bbc12dc9f05b2e7f2469128f8717b2f4d3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: teach the static_protection function about high mappings Right now, enforcing that the high mapping of the kernel text doesn't get the NX bit is done deep in the guts of CPA, rather than in the static_protection() function that enforces all other per-arch sanity checks. This patch moves this sanity check into the central static_protection() function instead, and makes it apply ONLY to the kernel text, not to all other areas in the high mapping. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 877b5cca2cb..bf5e33f6a32 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,6 +106,22 @@ static void cpa_flush_range(unsigned long start, int numpages) } } +#define HIGH_MAP_START __START_KERNEL_map +#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE) + + +/* + * Converts a virtual address to a X86-64 highmap address + */ +static unsigned long virt_to_highmap(void *address) +{ +#ifdef CONFIG_X86_64 + return __pa((unsigned long)address) + HIGH_MAP_START - phys_base; +#else + return (unsigned long)address; +#endif +} + /* * Certain areas of memory on x86 require very specific protection flags, * for example the BIOS area or kernel text. Callers don't always get this @@ -129,12 +145,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) */ if (within(address, (unsigned long)_text, (unsigned long)_etext)) pgprot_val(forbidden) |= _PAGE_NX; + /* + * Do the same for the x86-64 high kernel mapping + */ + if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) + pgprot_val(forbidden) |= _PAGE_NX; + #ifdef CONFIG_DEBUG_RODATA /* The .rodata section needs to be read-only */ if (within(address, (unsigned long)__start_rodata, (unsigned long)__end_rodata)) pgprot_val(forbidden) |= _PAGE_RW; + /* + * Do the same for the x86-64 high kernel mapping + */ + if (within(address, virt_to_highmap(__start_rodata), + virt_to_highmap(__end_rodata))) + pgprot_val(forbidden) |= _PAGE_RW; #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); @@ -304,8 +332,6 @@ repeat: * Modules and drivers should use the set_memory_* APIs instead. */ -#define HIGH_MAP_START __START_KERNEL_map -#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE) static int change_page_attr_addr(unsigned long address, pgprot_t mask_set, @@ -338,10 +364,11 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, /* * Calc the high mapping address. See __phys_addr() * for the non obvious details. + * + * Note that NX and other required permissions are + * checked in static_protections(). */ address = phys_addr + HIGH_MAP_START - phys_base; - /* Make sure the kernel mappings stay executable */ - pgprot_val(mask_clr) |= _PAGE_NX; /* * Our high aliases are imprecise, because we check -- cgit v1.2.3-70-g09d2 From 626c2c9d065da0cbd9997e112501487958fde690 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: use the pfn from the page when change its attributes When changing the attributes of a pte, we should use the PFN from the existing PTE rather than going through hoops calculating what we think it might have been; this is both fragile and totally unneeded. It also makes it more hairy to call any of these functions on non-direct maps for no good reason whatsover. With this change, __change_page_attr() no longer takes a pfn as argument, which simplifies all the callers. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bf5e33f6a32..6c55fbdbd7e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -277,17 +277,12 @@ out_unlock: } static int -__change_page_attr(unsigned long address, unsigned long pfn, - pgprot_t mask_set, pgprot_t mask_clr) +__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) { struct page *kpte_page; int level, err = 0; pte_t *kpte; -#ifdef CONFIG_X86_32 - BUG_ON(pfn > max_low_pfn); -#endif - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -298,17 +293,25 @@ repeat: BUG_ON(PageCompound(kpte_page)); if (level == PG_LEVEL_4K) { - pgprot_t new_prot = pte_pgprot(*kpte); pte_t new_pte, old_pte = *kpte; + pgprot_t new_prot = pte_pgprot(old_pte); + + if(!pte_val(old_pte)) { + WARN_ON_ONCE(1); + return -EINVAL; + } pgprot_val(new_prot) &= ~pgprot_val(mask_clr); pgprot_val(new_prot) |= pgprot_val(mask_set); new_prot = static_protections(new_prot, address); - new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); - BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte)); - + /* + * We need to keep the pfn from the existing PTE, + * after all we're only going to change it's attributes + * not the memory it points to + */ + new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); set_pte_atomic(kpte, new_pte); } else { err = split_large_page(kpte, address); @@ -337,11 +340,11 @@ static int change_page_attr_addr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) { - unsigned long phys_addr = __pa(address); - unsigned long pfn = phys_addr >> PAGE_SHIFT; int err; #ifdef CONFIG_X86_64 + unsigned long phys_addr = __pa(address); + /* * If we are inside the high mapped kernel range, then we * fixup the low mapping first. __va() returns the virtual @@ -351,7 +354,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, address = (unsigned long) __va(phys_addr); #endif - err = __change_page_attr(address, pfn, mask_set, mask_clr); + err = __change_page_attr(address, mask_set, mask_clr); if (err) return err; @@ -375,7 +378,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, * everything between 0 and KERNEL_TEXT_SIZE, so do * not propagate lookup failures back to users: */ - __change_page_attr(address, pfn, mask_set, mask_clr); + __change_page_attr(address, mask_set, mask_clr); } #endif return err; -- cgit v1.2.3-70-g09d2 From 63c1dcf4bc9a26b1d8baa9a8c7cc1b2e1e694011 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: CPA use the existing pfn in split as well When splitting large pages, we ge the pfn from the existing entry instead of calculating it ourself. This removes the last remaining range restriction of the cpa code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6c55fbdbd7e..a629cea5e46 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -221,8 +221,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); gfp_t gfp_flags = GFP_KERNEL; - unsigned long flags; - unsigned long addr; + unsigned long flags, addr, pfn; pte_t *pbase, *tmp; struct page *base; unsigned int i, level; @@ -253,8 +252,12 @@ static int split_large_page(pte_t *kpte, unsigned long address) paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) - set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); + /* + * Get the target pfn from the original entry: + */ + pfn = pte_pfn(*kpte); + for (i = 0; i < PTRS_PER_PTE; i++, pfn++) + set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* * Install the new, split up pagetable. Important detail here: -- cgit v1.2.3-70-g09d2 From 331e406588dc90331753e6562e5e3757bb907eb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:06 +0100 Subject: x86: CPA return early when requested feature is not available Mask out the not supported bits (e.g. NX). If the clr/set masks are empty after the mask return without changing anything. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a629cea5e46..f60b93dc2e5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -405,8 +405,18 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages, static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { - int ret = __change_page_attr_set_clr(addr, numpages, mask_set, - mask_clr); + int ret; + + /* + * Check, if we are requested to change a not supported + * feature: + */ + mask_set = canon_pgprot(mask_set); + mask_clr = canon_pgprot(mask_clr); + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) + return 0; + + ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); /* * On success we use clflush, when the CPU supports it to -- cgit v1.2.3-70-g09d2 From 6bb8383bebc02dae08a17f561401f58005f75c03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:06 +0100 Subject: x86: cpa, only flush the cache if the caching attributes have changed We only need to flush the caches in cpa() if the the caching attributes have changed. Otherwise only flush the TLBs. This checks the PAT bits too although they are currently not used by the kernel. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f60b93dc2e5..456ad0ab9c7 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -52,21 +52,23 @@ void clflush_cache_range(void *vaddr, unsigned int size) static void __cpa_flush_all(void *arg) { + unsigned long cache = (unsigned long)arg; + /* * Flush all to work around Errata in early athlons regarding * large page flushing. */ __flush_tlb_all(); - if (boot_cpu_data.x86_model >= 4) + if (cache && boot_cpu_data.x86_model >= 4) wbinvd(); } -static void cpa_flush_all(void) +static void cpa_flush_all(unsigned long cache) { BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_all, NULL, 1, 1); + on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); } static void __cpa_flush_range(void *arg) @@ -79,7 +81,7 @@ static void __cpa_flush_range(void *arg) __flush_tlb_all(); } -static void cpa_flush_range(unsigned long start, int numpages) +static void cpa_flush_range(unsigned long start, int numpages, int cache) { unsigned int i, level; unsigned long addr; @@ -89,6 +91,9 @@ static void cpa_flush_range(unsigned long start, int numpages) on_each_cpu(__cpa_flush_range, NULL, 1, 1); + if (!cache) + return; + /* * We only need to flush on one CPU, * clflush is a MESI-coherent instruction that @@ -402,10 +407,16 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages, return 0; } +static inline int cache_attr(pgprot_t attr) +{ + return pgprot_val(attr) & + (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); +} + static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { - int ret; + int ret, cache; /* * Check, if we are requested to change a not supported @@ -418,6 +429,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); + /* + * No need to flush, when we did not set any of the caching + * attributes: + */ + cache = cache_attr(mask_set); + /* * On success we use clflush, when the CPU supports it to * avoid the wbindv. If the CPU does not support it and in the @@ -425,9 +442,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, * wbindv): */ if (!ret && cpu_has_clflush) - cpa_flush_range(addr, numpages); + cpa_flush_range(addr, numpages, cache); else - cpa_flush_all(); + cpa_flush_all(cache); return ret; } -- cgit v1.2.3-70-g09d2 From 72e458dfa63b3db7a46f66b0eb19e9ff4e17fc0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: introduce struct cpa_data The number of arguments which need to be transported is increasing and we want to add flush optimizations and large page preserving. Create struct cpa data and pass a pointer instead of increasing the number of arguments further. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 75 +++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 456ad0ab9c7..d1c08308ecb 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -16,6 +16,13 @@ #include #include +struct cpa_data { + unsigned long vaddr; + int numpages; + pgprot_t mask_set; + pgprot_t mask_clr; +}; + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -284,8 +291,7 @@ out_unlock: return 0; } -static int -__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) +static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; int level, err = 0; @@ -305,12 +311,15 @@ repeat: pgprot_t new_prot = pte_pgprot(old_pte); if(!pte_val(old_pte)) { - WARN_ON_ONCE(1); + printk(KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", address, + cpa->vaddr); + WARN_ON(1); return -EINVAL; } - pgprot_val(new_prot) &= ~pgprot_val(mask_clr); - pgprot_val(new_prot) |= pgprot_val(mask_set); + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); new_prot = static_protections(new_prot, address); @@ -343,12 +352,10 @@ repeat: * Modules and drivers should use the set_memory_* APIs instead. */ - -static int -change_page_attr_addr(unsigned long address, pgprot_t mask_set, - pgprot_t mask_clr) +static int change_page_attr_addr(struct cpa_data *cpa) { int err; + unsigned long address = cpa->vaddr; #ifdef CONFIG_X86_64 unsigned long phys_addr = __pa(address); @@ -362,7 +369,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, address = (unsigned long) __va(phys_addr); #endif - err = __change_page_attr(address, mask_set, mask_clr); + err = __change_page_attr(address, cpa); if (err) return err; @@ -386,20 +393,19 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, * everything between 0 and KERNEL_TEXT_SIZE, so do * not propagate lookup failures back to users: */ - __change_page_attr(address, mask_set, mask_clr); + __change_page_attr(address, cpa); } #endif return err; } -static int __change_page_attr_set_clr(unsigned long addr, int numpages, - pgprot_t mask_set, pgprot_t mask_clr) +static int __change_page_attr_set_clr(struct cpa_data *cpa) { unsigned int i; int ret; - for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) { - ret = change_page_attr_addr(addr, mask_set, mask_clr); + for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) { + ret = change_page_attr_addr(cpa); if (ret) return ret; } @@ -416,6 +422,7 @@ static inline int cache_attr(pgprot_t attr) static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { + struct cpa_data cpa; int ret, cache; /* @@ -427,7 +434,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) return 0; - ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); + cpa.vaddr = addr; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; + + ret = __change_page_attr_set_clr(&cpa); /* * No need to flush, when we did not set any of the caching @@ -548,37 +560,26 @@ int set_pages_rw(struct page *page, int numpages) return set_memory_rw(addr, numpages); } - -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG) -static inline int __change_page_attr_set(unsigned long addr, int numpages, - pgprot_t mask) -{ - return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); -} - -static inline int __change_page_attr_clear(unsigned long addr, int numpages, - pgprot_t mask) -{ - return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); -} -#endif - #ifdef CONFIG_DEBUG_PAGEALLOC static int __set_pages_p(struct page *page, int numpages) { - unsigned long addr = (unsigned long)page_address(page); + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), + .numpages = numpages, + .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(0)}; - return __change_page_attr_set(addr, numpages, - __pgprot(_PAGE_PRESENT | _PAGE_RW)); + return __change_page_attr_set_clr(&cpa); } static int __set_pages_np(struct page *page, int numpages) { - unsigned long addr = (unsigned long)page_address(page); + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; - return __change_page_attr_clear(addr, numpages, - __pgprot(_PAGE_PRESENT)); + return __change_page_attr_set_clr(&cpa); } void kernel_map_pages(struct page *page, int numpages, int enable) -- cgit v1.2.3-70-g09d2 From f4ae5da0e8e92caa168e7c2a7c4a6c4064b082c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, check if we changed anything and tlb flushing is necessary Flush tlbs only when there was a real change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d1c08308ecb..79a9f1b42dd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -21,6 +21,7 @@ struct cpa_data { int numpages; pgprot_t mask_set; pgprot_t mask_clr; + int flushtlb; }; static inline int @@ -329,11 +330,19 @@ repeat: * not the memory it points to */ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); - set_pte_atomic(kpte, new_pte); + + /* + * Do we really change anything ? + */ + if (pte_val(old_pte) != pte_val(new_pte)) { + set_pte_atomic(kpte, new_pte); + cpa->flushtlb = 1; + } } else { err = split_large_page(kpte, address); if (!err) goto repeat; + cpa->flushtlb = 1; } return err; } @@ -438,9 +447,16 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; + cpa.flushtlb = 0; ret = __change_page_attr_set_clr(&cpa); + /* + * Check whether we really changed something: + */ + if (!cpa.flushtlb) + return ret; + /* * No need to flush, when we did not set any of the caching * attributes: -- cgit v1.2.3-70-g09d2 From 65e074dffa198978ab0c9976a19b954fbe1183e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, preserve large pages if possible When CPA is called on a range which fits into a large page mapping, avoid to split the page when: 1) There is no change of attributes 2) The range to change is a complete large mapping Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 142 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 130 insertions(+), 12 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 79a9f1b42dd..40b7ac58e67 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -18,12 +18,17 @@ struct cpa_data { unsigned long vaddr; - int numpages; pgprot_t mask_set; pgprot_t mask_clr; + int numpages; int flushtlb; }; +enum { + CPA_NO_SPLIT = 0, + CPA_SPLIT, +}; + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -230,6 +235,86 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) #endif } +static int try_preserve_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) +{ + unsigned long nextpage_addr, numpages, pmask, psize, flags; + pte_t new_pte, old_pte, *tmp; + pgprot_t old_prot, new_prot; + int level, res = CPA_SPLIT; + + spin_lock_irqsave(&pgd_lock, flags); + /* + * Check for races, another CPU might have split this page + * up already: + */ + tmp = lookup_address(address, &level); + if (tmp != kpte) + goto out_unlock; + + switch (level) { + case PG_LEVEL_2M: + psize = LARGE_PAGE_SIZE; + pmask = LARGE_PAGE_MASK; + break; + case PG_LEVEL_1G: + default: + res = -EINVAL; + goto out_unlock; + } + + /* + * Calculate the number of pages, which fit into this large + * page starting at address: + */ + nextpage_addr = (address + psize) & pmask; + numpages = (nextpage_addr - address) >> PAGE_SHIFT; + if (numpages < cpa->numpages) + cpa->numpages = numpages; + + /* + * We are safe now. Check whether the new pgprot is the same: + */ + old_pte = *kpte; + old_prot = new_prot = pte_pgprot(old_pte); + + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + new_prot = static_protections(new_prot, address); + + /* + * If there are no changes, return. maxpages has been updated + * above: + */ + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { + res = CPA_NO_SPLIT; + goto out_unlock; + } + + /* + * We need to change the attributes. Check, whether we can + * change the large page in one go. We request a split, when + * the address is not aligned and the number of pages is + * smaller than the number of pages in the large page. Note + * that we limited the number of possible pages already to + * the number of pages in the large page. + */ + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + /* + * The address is aligned and the number of pages + * covers the full page. + */ + new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); + __set_pmd_pte(kpte, address, new_pte); + cpa->flushtlb = 1; + res = CPA_NO_SPLIT; + } + +out_unlock: + spin_unlock_irqrestore(&pgd_lock, flags); + return res; +} + static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); @@ -295,7 +380,7 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; - int level, err = 0; + int level, res; pte_t *kpte; repeat: @@ -338,13 +423,34 @@ repeat: set_pte_atomic(kpte, new_pte); cpa->flushtlb = 1; } - } else { - err = split_large_page(kpte, address); - if (!err) - goto repeat; - cpa->flushtlb = 1; + cpa->numpages = 1; + return 0; } - return err; + + /* + * Check, whether we can keep the large page intact + * and just change the pte: + */ + res = try_preserve_large_page(kpte, address, cpa); + if (res < 0) + return res; + + /* + * When the range fits into the existing large page, + * return. cp->numpages and cpa->tlbflush have been updated in + * try_large_page: + */ + if (res == CPA_NO_SPLIT) + return 0; + + /* + * We have to split the large page: + */ + res = split_large_page(kpte, address); + if (res) + return res; + cpa->flushtlb = 1; + goto repeat; } /** @@ -410,15 +516,27 @@ static int change_page_attr_addr(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa) { - unsigned int i; - int ret; + int ret, numpages = cpa->numpages; - for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) { + while (numpages) { + /* + * Store the remaining nr of pages for the large page + * preservation check. + */ + cpa->numpages = numpages; ret = change_page_attr_addr(cpa); if (ret) return ret; - } + /* + * Adjust the number of pages with the result of the + * CPA operation. Either a large page has been + * preserved or a single page update happened. + */ + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; + cpa->vaddr += cpa->numpages * PAGE_SIZE; + } return 0; } -- cgit v1.2.3-70-g09d2 From 34508f66b69ff1708192654f631eb8f1d4c52005 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: AMD Athlon X2 hard hang fix An Athlon 64 X2 test system showed hard hangs shortly after marking the kernel text read-only, if we tried to preserve largepages and changed the PSE entry from RW to RO. The pagetable code itself is correct, it's the CPU that locked up hard (and not even the NMI watchdog could punch through that hard hang). So be conservative and always do splitups - like we did in the past. Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 40b7ac58e67..3810f7a83b1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -243,6 +243,17 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_t old_prot, new_prot; int level, res = CPA_SPLIT; + /* + * An Athlon 64 X2 showed hard hangs if we tried to preserve + * largepages and changed the PSE entry from RW to RO. + * + * As AMD CPUs have a long series of erratas in this area, + * (and none of the known ones seem to explain this hang), + * disable this code until the hang can be debugged: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return res; + spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page -- cgit v1.2.3-70-g09d2 From 9a14aefc1d28c6037122965ee8c10d92a970ade0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, fix lookup_address lookup_address() returns a wrong level and a wrong pointer to a non existing pte, when pmd or pud entries are marked !present. This happens for example due to boot time mapping of GART into the low memory space. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3810f7a83b1..7d21cd658ed 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -188,6 +188,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) return prot; } +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ pte_t *lookup_address(unsigned long address, int *level) { pgd_t *pgd = pgd_offset_k(address); @@ -206,7 +214,7 @@ pte_t *lookup_address(unsigned long address, int *level) return NULL; *level = PG_LEVEL_2M; - if (pmd_large(*pmd)) + if (pmd_large(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; *level = PG_LEVEL_4K; -- cgit v1.2.3-70-g09d2 From 31422c51e0dc72532d82e80895932d430c3ed307 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE Fix up all users. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/kernel/head_64.S | 4 ++-- arch/x86/kernel/pci-gart_64.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/mm/pageattr.c | 6 +++--- include/asm-x86/page.h | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1ccb38a7f0d..e8657b98c90 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -80,8 +80,8 @@ startup_32: #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(LARGE_PAGE_SIZE -1), %ebx - andl $LARGE_PAGE_MASK, %ebx + addl $(PMD_PAGE_SIZE -1), %ebx + andl $PMD_PAGE_MASK, %ebx #else movl $CONFIG_PHYSICAL_START, %ebx #endif @@ -220,8 +220,8 @@ ENTRY(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - addq $(LARGE_PAGE_SIZE - 1), %rbp - andq $LARGE_PAGE_MASK, %rbp + addq $(PMD_PAGE_SIZE - 1), %rbp + andq $PMD_PAGE_MASK, %rbp movq %rbp, %rbx #else movq $CONFIG_PHYSICAL_START, %rbp diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 1d5a7a36120..4f283ad215e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -63,7 +63,7 @@ startup_64: /* Is the address not 2M aligned? */ movq %rbp, %rax - andl $~LARGE_PAGE_MASK, %eax + andl $~PMD_PAGE_MASK, %eax testl %eax, %eax jnz bad_address @@ -88,7 +88,7 @@ startup_64: /* Add an Identity mapping if I am above 1G */ leaq _text(%rip), %rdi - andq $LARGE_PAGE_MASK, %rdi + andq $PMD_PAGE_MASK, %rdi movq %rdi, %rax shrq $PUD_SHIFT, %rax diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d5cc718198..ae1d3d8b384 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { printk(KERN_WARNING diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index eabcaed76c2..b7a7992c28b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -444,10 +444,10 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) { unsigned long end = address + size; - BUG_ON(address & ~LARGE_PAGE_MASK); - BUG_ON(size & ~LARGE_PAGE_MASK); + BUG_ON(address & ~PMD_PAGE_MASK); + BUG_ON(size & ~PMD_PAGE_MASK); - for (; address < end; address += LARGE_PAGE_SIZE) { + for (; address < end; address += PMD_PAGE_SIZE) { pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pmd_t *pmd; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7d21cd658ed..74446ea23ff 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -273,8 +273,8 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: - psize = LARGE_PAGE_SIZE; - pmask = LARGE_PAGE_MASK; + psize = PMD_PAGE_SIZE; + pmask = PMD_PAGE_MASK; break; case PG_LEVEL_1G: default: @@ -363,7 +363,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) } address = __pa(address); - addr = address & LARGE_PAGE_MASK; + addr = address & PMD_PAGE_MASK; pbase = (pte_t *)page_address(base); #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index c8b30efeed8..1cb7c51bc29 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -13,8 +13,8 @@ #define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK) #define PTE_MASK (_AT(long, PHYSICAL_PAGE_MASK)) -#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) -#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) +#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) -- cgit v1.2.3-70-g09d2 From 07cf89c05f2bbafa002401ac4e09ac31678513e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: CPA fix pagetable split Move the readout of the large entry into the spinlock section to prevent an unlikely but possible race. Mark the pmd/pud entry present after the split. We preserved the non present bit in the new split mapping. Remove the stale gfp_flags double initialization. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 74446ea23ff..72880993af8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -336,7 +336,7 @@ out_unlock: static int split_large_page(pte_t *kpte, unsigned long address) { - pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); + pgprot_t ref_prot; gfp_t gfp_flags = GFP_KERNEL; unsigned long flags, addr, pfn; pte_t *pbase, *tmp; @@ -344,7 +344,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) unsigned int i, level; #ifdef CONFIG_DEBUG_PAGEALLOC - gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN; gfp_flags = GFP_ATOMIC | __GFP_NOWARN; #endif base = alloc_pages(gfp_flags, 0); @@ -368,6 +367,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif + ref_prot = pte_pgprot(pte_clrhuge(*kpte)); /* * Get the target pfn from the original entry: @@ -377,13 +377,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* - * Install the new, split up pagetable. Important detail here: + * Install the new, split up pagetable. Important details here: * * On Intel the NX bit of all levels must be cleared to make a * page executable. See section 4.13.2 of Intel 64 and IA-32 * Architectures Software Developer's Manual). + * + * Mark the entry present. The current mapping might be + * set to not present, which we preserved above. */ ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); + pgprot_val(ref_prot) |= _PAGE_PRESENT; __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); base = NULL; -- cgit v1.2.3-70-g09d2 From 6ce9fc17d913ae51f8434d2826f306347820b07d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: remove cpa warning this race is legit and can happen on SMP systems. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 72880993af8..0b029c97174 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -356,10 +356,8 @@ static int split_large_page(pte_t *kpte, unsigned long address) * up for us already: */ tmp = lookup_address(address, &level); - if (tmp != kpte) { - WARN_ON_ONCE(1); + if (tmp != kpte) goto out_unlock; - } address = __pa(address); addr = address & PMD_PAGE_MASK; -- cgit v1.2.3-70-g09d2 From 7bfb72e847c201fe32271fb13f75d060671d8890 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: fix page-present check in cpa_flush_range pte_present() might return true for PROT_NONE mappings. Explicitely check the present bit. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0b029c97174..9be684e61dc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -119,7 +119,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) /* * Only flush present addresses: */ - if (pte && pte_present(*pte)) + if (pte && (pte_val(*pte) & _PAGE_PRESENT)) clflush_cache_range((void *) addr, PAGE_SIZE); } } -- cgit v1.2.3-70-g09d2 From c2f71ee2140b2a506735ff9fcb7e3b1dfaab8f2b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: add gbpages support to lookup_address [ tglx@linutronix.de: fix bootup crash on sparse mappings. ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9be684e61dc..143fbafc948 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -209,6 +209,11 @@ pte_t *lookup_address(unsigned long address, int *level) pud = pud_offset(pgd, address); if (pud_none(*pud)) return NULL; + + *level = PG_LEVEL_1G; + if (pud_large(*pud) || !pud_present(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return NULL; -- cgit v1.2.3-70-g09d2 From f07333fd149eb6826da26a89c3aff90324f270b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: implement gbpages support in change_page_attr() Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 143fbafc948..42ca3d8effa 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -281,7 +281,12 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, psize = PMD_PAGE_SIZE; pmask = PMD_PAGE_MASK; break; +#ifdef CONFIG_X86_64 case PG_LEVEL_1G: + psize = PMD_PAGE_SIZE; + pmask = PMD_PAGE_MASK; + break; +#endif default: res = -EINVAL; goto out_unlock; @@ -343,7 +348,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot; gfp_t gfp_flags = GFP_KERNEL; - unsigned long flags, addr, pfn; + unsigned long flags, addr, pfn, pfninc = 1; pte_t *pbase, *tmp; struct page *base; unsigned int i, level; @@ -372,11 +377,19 @@ static int split_large_page(pte_t *kpte, unsigned long address) #endif ref_prot = pte_pgprot(pte_clrhuge(*kpte)); +#ifdef CONFIG_X86_64 + if (level == PG_LEVEL_1G) { + pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pgprot_val(ref_prot) |= _PAGE_PSE; + addr &= PUD_PAGE_MASK; + } +#endif + /* * Get the target pfn from the original entry: */ pfn = pte_pfn(*kpte); - for (i = 0; i < PTRS_PER_PTE; i++, pfn++) + for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* -- cgit v1.2.3-70-g09d2 From 9df84993cb3d71669894654ab257f01f6e4ed48e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: cpa, cleanups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 42ca3d8effa..029fb07b3f0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -16,6 +16,9 @@ #include #include +/* + * The current flushing context - we pass it instead of 5 arguments: + */ struct cpa_data { unsigned long vaddr; pgprot_t mask_set; @@ -206,6 +209,7 @@ pte_t *lookup_address(unsigned long address, int *level) if (pgd_none(*pgd)) return NULL; + pud = pud_offset(pgd, address); if (pud_none(*pud)) return NULL; @@ -223,9 +227,13 @@ pte_t *lookup_address(unsigned long address, int *level) return (pte_t *)pmd; *level = PG_LEVEL_4K; + return pte_offset_kernel(pmd, address); } +/* + * Set the new pmd in all the pgds we know about: + */ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) { /* change init_mm */ @@ -248,8 +256,9 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) #endif } -static int try_preserve_large_page(pte_t *kpte, unsigned long address, - struct cpa_data *cpa) +static int +try_preserve_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) { unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; @@ -341,17 +350,18 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, out_unlock: spin_unlock_irqrestore(&pgd_lock, flags); + return res; } static int split_large_page(pte_t *kpte, unsigned long address) { - pgprot_t ref_prot; - gfp_t gfp_flags = GFP_KERNEL; unsigned long flags, addr, pfn, pfninc = 1; + gfp_t gfp_flags = GFP_KERNEL; + unsigned int i, level; pte_t *pbase, *tmp; + pgprot_t ref_prot; struct page *base; - unsigned int i, level; #ifdef CONFIG_DEBUG_PAGEALLOC gfp_flags = GFP_ATOMIC | __GFP_NOWARN; @@ -505,7 +515,6 @@ repeat: * * Modules and drivers should use the set_memory_* APIs instead. */ - static int change_page_attr_addr(struct cpa_data *cpa) { int err; -- cgit v1.2.3-70-g09d2 From beaff6333b4a21e8f3b7f9a7c3c8f8716b2334bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: cpa, eliminate CPA_ enum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 029fb07b3f0..fb2eedba76a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -27,11 +27,6 @@ struct cpa_data { int flushtlb; }; -enum { - CPA_NO_SPLIT = 0, - CPA_SPLIT, -}; - static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -263,7 +258,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; - int level, res = CPA_SPLIT; + int level, do_split = 1; /* * An Athlon 64 X2 showed hard hangs if we tried to preserve @@ -274,7 +269,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * disable this code until the hang can be debugged: */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return res; + return 1; spin_lock_irqsave(&pgd_lock, flags); /* @@ -297,7 +292,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, break; #endif default: - res = -EINVAL; + do_split = -EINVAL; goto out_unlock; } @@ -325,7 +320,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * above: */ if (pgprot_val(new_prot) == pgprot_val(old_prot)) { - res = CPA_NO_SPLIT; + do_split = 0; goto out_unlock; } @@ -345,13 +340,13 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); __set_pmd_pte(kpte, address, new_pte); cpa->flushtlb = 1; - res = CPA_NO_SPLIT; + do_split = 0; } out_unlock: spin_unlock_irqrestore(&pgd_lock, flags); - return res; + return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) @@ -429,7 +424,7 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; - int level, res; + int level, do_split; pte_t *kpte; repeat: @@ -480,25 +475,26 @@ repeat: * Check, whether we can keep the large page intact * and just change the pte: */ - res = try_preserve_large_page(kpte, address, cpa); - if (res < 0) - return res; + do_split = try_preserve_large_page(kpte, address, cpa); + if (do_split < 0) + return do_split; /* * When the range fits into the existing large page, * return. cp->numpages and cpa->tlbflush have been updated in * try_large_page: */ - if (res == CPA_NO_SPLIT) + if (do_split == 0) return 0; /* * We have to split the large page: */ - res = split_large_page(kpte, address); - if (res) - return res; + do_split = split_large_page(kpte, address); + if (do_split) + return do_split; cpa->flushtlb = 1; + goto repeat; } -- cgit v1.2.3-70-g09d2 From 87f7f8fe328388a1430a4c27cbe684f3925fd8a5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:10 +0100 Subject: x86: cpa, clean up code flow Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fb2eedba76a..4f033505127 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -423,8 +423,8 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { + int level, do_split, err; struct page *kpte_page; - int level, do_split; pte_t *kpte; repeat: @@ -476,26 +476,24 @@ repeat: * and just change the pte: */ do_split = try_preserve_large_page(kpte, address, cpa); - if (do_split < 0) - return do_split; - /* * When the range fits into the existing large page, * return. cp->numpages and cpa->tlbflush have been updated in * try_large_page: */ - if (do_split == 0) - return 0; + if (do_split <= 0) + return do_split; /* * We have to split the large page: */ - do_split = split_large_page(kpte, address); - if (do_split) - return do_split; - cpa->flushtlb = 1; + err = split_large_page(kpte, address); + if (!err) { + cpa->flushtlb = 1; + goto repeat; + } - goto repeat; + return err; } /** -- cgit v1.2.3-70-g09d2 From 7b610eec7a06ede64f71459e7f412dfd96f4cc5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:10 +0100 Subject: x86: cpa, micro-optimization Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4f033505127..bb55a78dcd6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -237,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) if (!SHARED_KERNEL_PMD) { struct page *page; + address = __pa(address); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pud_t *pud; @@ -351,7 +352,7 @@ out_unlock: static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, addr, pfn, pfninc = 1; + unsigned long flags, pfn, pfninc = 1; gfp_t gfp_flags = GFP_KERNEL; unsigned int i, level; pte_t *pbase, *tmp; @@ -374,8 +375,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (tmp != kpte) goto out_unlock; - address = __pa(address); - addr = address & PMD_PAGE_MASK; pbase = (pte_t *)page_address(base); #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); @@ -386,7 +385,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (level == PG_LEVEL_1G) { pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; pgprot_val(ref_prot) |= _PAGE_PSE; - addr &= PUD_PAGE_MASK; } #endif -- cgit v1.2.3-70-g09d2