From 11124411aa95827404d6bfdfc14c908e1b54513c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:09 +0900 Subject: x86: convert to the new dynamic percpu allocator Impact: use new dynamic allocator, unified access to static/dynamic percpu memory Convert to the new dynamic percpu allocator. * implement populate_extra_pte() for both 32 and 64 * update setup_per_cpu_areas() to use pcpu_setup_static() * define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() * define config HAVE_DYNAMIC_PER_CPU_AREA Signed-off-by: Tejun Heo --- arch/x86/mm/init_32.c | 10 ++++++++++ arch/x86/mm/init_64.c | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 00263bf07a8..8b1a0ef7f87 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } +void __init populate_extra_pte(unsigned long vaddr) +{ + int pgd_idx = pgd_index(vaddr); + int pmd_idx = pmd_index(vaddr); + pmd_t *pmd; + + pmd = one_md_table_init(swapper_pg_dir + pgd_idx); + one_page_table_init(pmd + pmd_idx); +} + static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, unsigned long vaddr, pte_t *lastpte) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e6d36b49025..7f91e2cdc4c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -223,6 +223,25 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_pud(pud_page, vaddr, pteval); } +void __init populate_extra_pte(unsigned long vaddr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = pgd_offset_k(vaddr); + if (pgd_none(*pgd)) { + pud = (pud_t *)spp_getpage(); + pgd_populate(&init_mm, pgd, pud); + if (pud != pud_offset(pgd, 0)) { + printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", + pud, pud_offset(pgd, 0)); + return; + } + } + + set_pte_vaddr_pud((pud_t *)pgd_page_vaddr(*pgd), vaddr, __pte(0)); +} + /* * Create large page table mappings for a range of physical addresses. */ -- cgit v1.2.3-70-g09d2 From 458a3e644c3327be529393982e24277eda8f1ac7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: x86: update populate_extra_pte() and add populate_extra_pmd() Impact: minor change to populate_extra_pte() and addition of pmd flavor Update populate_extra_pte() to return pointer to the pte_t for the specified address and add populate_extra_pmd() which only populates till the pmd and returns pointer to the pmd entry for the address. For 64bit, pud/pmd/pte fill functions are separated out from set_pte_vaddr[_pud]() and used for set_pte_vaddr[_pud]() and populate_extra_{pte|pmd}(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/pgtable.h | 3 +- arch/x86/kernel/setup_percpu.c | 7 +++- arch/x86/mm/init_32.c | 13 ++++++-- arch/x86/mm/init_64.c | 75 +++++++++++++++++++++++++----------------- 4 files changed, 63 insertions(+), 35 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dd91c2515c6..46312eb0d68 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -402,7 +402,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); -void populate_extra_pte(unsigned long vaddr); +pmd_t *populate_extra_pmd(unsigned long vaddr); +pte_t *populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2dce4355821..671e6528a82 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -41,6 +41,11 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static void __init pcpu4k_populate_pte(unsigned long addr) +{ + populate_extra_pte(addr); +} + static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -104,7 +109,7 @@ void __init setup_per_cpu_areas(void) } } - pcpu_unit_size = pcpu_setup_static(populate_extra_pte, pages, size); + pcpu_unit_size = pcpu_setup_static(pcpu4k_populate_pte, pages, size); free_bootmem(__pa(pages), pages_size); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8b1a0ef7f87..84a26883ab4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,14 +137,21 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } -void __init populate_extra_pte(unsigned long vaddr) +pmd_t * __init populate_extra_pmd(unsigned long vaddr) { int pgd_idx = pgd_index(vaddr); int pmd_idx = pmd_index(vaddr); + + return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; +} + +pte_t * __init populate_extra_pte(unsigned long vaddr) +{ + int pte_idx = pte_index(vaddr); pmd_t *pmd; - pmd = one_md_table_init(swapper_pg_dir + pgd_idx); - one_page_table_init(pmd + pmd_idx); + pmd = populate_extra_pmd(vaddr); + return one_page_table_init(pmd) + pte_idx; } static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7f91e2cdc4c..7d4e76da336 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -168,34 +168,51 @@ static __ref void *spp_getpage(void) return ptr; } -void -set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) +static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) { - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + if (pgd_none(*pgd)) { + pud_t *pud = (pud_t *)spp_getpage(); + pgd_populate(&init_mm, pgd, pud); + if (pud != pud_offset(pgd, 0)) + printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", + pud, pud_offset(pgd, 0)); + } + return pud_offset(pgd, vaddr); +} - pud = pud_page + pud_index(vaddr); +static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) +{ if (pud_none(*pud)) { - pmd = (pmd_t *) spp_getpage(); + pmd_t *pmd = (pmd_t *) spp_getpage(); pud_populate(&init_mm, pud, pmd); - if (pmd != pmd_offset(pud, 0)) { + if (pmd != pmd_offset(pud, 0)) printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", - pmd, pmd_offset(pud, 0)); - return; - } + pmd, pmd_offset(pud, 0)); } - pmd = pmd_offset(pud, vaddr); + return pmd_offset(pud, vaddr); +} + +static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr) +{ if (pmd_none(*pmd)) { - pte = (pte_t *) spp_getpage(); + pte_t *pte = (pte_t *) spp_getpage(); pmd_populate_kernel(&init_mm, pmd, pte); - if (pte != pte_offset_kernel(pmd, 0)) { + if (pte != pte_offset_kernel(pmd, 0)) printk(KERN_ERR "PAGETABLE BUG #02!\n"); - return; - } } + return pte_offset_kernel(pmd, vaddr); +} + +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pud = pud_page + pud_index(vaddr); + pmd = fill_pmd(pud, vaddr); + pte = fill_pte(pmd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); set_pte(pte, new_pte); /* @@ -205,8 +222,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) __flush_tlb_one(vaddr); } -void -set_pte_vaddr(unsigned long vaddr, pte_t pteval) +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) { pgd_t *pgd; pud_t *pud_page; @@ -223,23 +239,22 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_pud(pud_page, vaddr, pteval); } -void __init populate_extra_pte(unsigned long vaddr) +pmd_t * __init populate_extra_pmd(unsigned long vaddr) { pgd_t *pgd; pud_t *pud; pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd)) { - pud = (pud_t *)spp_getpage(); - pgd_populate(&init_mm, pgd, pud); - if (pud != pud_offset(pgd, 0)) { - printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", - pud, pud_offset(pgd, 0)); - return; - } - } + pud = fill_pud(pgd, vaddr); + return fill_pmd(pud, vaddr); +} + +pte_t * __init populate_extra_pte(unsigned long vaddr) +{ + pmd_t *pmd; - set_pte_vaddr_pud((pud_t *)pgd_page_vaddr(*pgd), vaddr, __pte(0)); + pmd = populate_extra_pmd(vaddr); + return fill_pte(pmd, vaddr); } /* -- cgit v1.2.3-70-g09d2 From fab852aaf761a00cfe16330429b7cac15cceaeb9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:09:14 +0200 Subject: x86: count errors in testmmiotrace.ko Check the read values against the written values in the MMIO read/write test. This test shows if the given MMIO test area really works as memory, which is a prerequisite for a successful mmiotrace test. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/testmmiotrace.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402..4b29f3b0ee0 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,5 +1,5 @@ /* - * Written by Pekka Paalanen, 2008 + * Written by Pekka Paalanen, 2008-2009 */ #include #include @@ -11,28 +11,51 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +static unsigned v16(unsigned i) +{ + return i * 12 + 7; +} + +static unsigned v32(unsigned i) +{ + return i * 212371 + 13; +} + static void do_write_test(void __iomem *p) { unsigned int i; mmiotrace_printk("Write test.\n"); + for (i = 0; i < 256; i++) iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(i * 12 + 7, p + i); + iowrite16(v16(i), p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(i * 212371 + 13, p + i); + iowrite32(v32(i), p + i); } static void do_read_test(void __iomem *p) { unsigned int i; + unsigned errs[3] = { 0 }; mmiotrace_printk("Read test.\n"); + for (i = 0; i < 256; i++) - ioread8(p + i); + if (ioread8(p + i) != i) + ++errs[0]; + for (i = 1024; i < (5 * 1024); i += 2) - ioread16(p + i); + if (ioread16(p + i) != v16(i)) + ++errs[1]; + for (i = (5 * 1024); i < (16 * 1024); i += 4) - ioread32(p + i); + if (ioread32(p + i) != v32(i)) + ++errs[2]; + + mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", + errs[0], errs[1], errs[2]); } static void do_test(void) -- cgit v1.2.3-70-g09d2 From 5ff93697fcfe1e2b9e61db82961d8f50d1ad5d57 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:10:08 +0200 Subject: x86: add far read test to testmmiotrace Apparently pages far into an ioremapped region might not actually be mapped during ioremap(). Add an optional read test to try to trigger a multiply faulting MMIO access. Also add more messages to the kernel log to help debugging. This patch is based on a patch suggested by Stuart Bennett who discovered bugs in mmiotrace related to normal kernel space faults. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/testmmiotrace.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 4b29f3b0ee0..427fd1b56df 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -9,7 +9,13 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " + "(or 8 MB if read_far is non-zero)."); + +static unsigned long read_far = 0x400100; +module_param(read_far, ulong, 0); +MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " + "(default: 0x400100)."); static unsigned v16(unsigned i) { @@ -24,6 +30,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; + pr_info(MODULE_NAME ": write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -40,6 +47,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; + pr_info(MODULE_NAME ": read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -58,9 +66,17 @@ static void do_read_test(void __iomem *p) errs[0], errs[1], errs[2]); } -static void do_test(void) +static void do_read_far_test(void __iomem *p) +{ + pr_info(MODULE_NAME ": read far test.\n"); + mmiotrace_printk("Read far test.\n"); + + ioread32(p + read_far); +} + +static void do_test(unsigned long size) { - void __iomem *p = ioremap_nocache(mmio_address, 0x4000); + void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; @@ -68,11 +84,15 @@ static void do_test(void) mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); + if (read_far && read_far < size - 4) + do_read_far_test(p); iounmap(p); } static int __init init(void) { + unsigned long size = (read_far) ? (8 << 20) : (16 << 10); + if (mmio_address == 0) { pr_err(MODULE_NAME ": you have to use the module argument " "mmio_address.\n"); @@ -81,10 +101,11 @@ static int __init init(void) return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " - "in PCI address space, and writing " - "rubbish in there.\n", mmio_address); - do_test(); + pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " + "address space, and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); + do_test(size); + pr_info(MODULE_NAME ": All done.\n"); return 0; } -- cgit v1.2.3-70-g09d2 From e9d54cae8f03e7f963a12f44bd50d68f49b9ea36 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Fri, 30 Jan 2009 17:38:59 +0000 Subject: x86 mmiotrace: WARN_ONCE if dis/arming a page fails Print a full warning once, if arming or disarming a page fails. Also, if initial arming fails, do not handle the page further. This avoids the possibility of a page failing to arm and then later claiming to have handled any fault on that page. WARN_ONCE added by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4..fb1f11546fc 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -105,7 +105,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, +static int set_page_present(unsigned long addr, bool present, unsigned int *pglevel) { pteval_t pteval; @@ -116,7 +116,7 @@ static void set_page_present(unsigned long addr, bool present, if (!pte) { pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; + return -1; } if (pglevel) @@ -140,22 +140,27 @@ static void set_page_present(unsigned long addr, bool present, default: pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; + return -1; } __flush_tlb_one(addr); + + return 0; } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, pglevel); + int ret = set_page_present(page & PAGE_MASK, false, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + return ret; } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, pglevel); + int ret = set_page_present(page & PAGE_MASK, true, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); } /* @@ -326,9 +331,13 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - arm_kmmio_fault_page(f->page, NULL); + if (arm_kmmio_fault_page(f->page, NULL)) { + kfree(f); + return -1; + } + + list_add_rcu(&f->list, kmmio_page_list(f->page)); return 0; } -- cgit v1.2.3-70-g09d2 From 5359b585fb5edb3db34d6cd491e1475b098c61d3 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:11:58 +0200 Subject: x86 mmiotrace: fix save/restore page table state From baa99e2b32449ec7bf147c234adfa444caecac8a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 22 Feb 2009 20:02:43 +0200 Blindly setting _PAGE_PRESENT in disarm_kmmio_fault_page() overlooks the possibility, that the page was not present when it was armed. Make arm_kmmio_fault_page() store the previous page presence in struct kmmio_fault_page and use it on disarm. This patch was originally written by Stuart Bennett, but Pekka Paalanen rewrote it a little different. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 66 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 22 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index fb1f11546fc..be361eb828c 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,6 +32,8 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ + bool old_presence; /* page presence prior to arming */ + bool armed; /* * Number of times this page has been registered as a part @@ -105,8 +107,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static int set_page_present(unsigned long addr, bool present, - unsigned int *pglevel) +static int set_page_presence(unsigned long addr, bool present, bool *old) { pteval_t pteval; pmdval_t pmdval; @@ -119,20 +120,21 @@ static int set_page_present(unsigned long addr, bool present, return -1; } - if (pglevel) - *pglevel = level; - switch (level) { case PG_LEVEL_2M: pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + pmdval = pmd_val(*pmd); + *old = !!(pmdval & _PAGE_PRESENT); + pmdval &= ~_PAGE_PRESENT; if (present) pmdval |= _PAGE_PRESENT; set_pmd(pmd, __pmd(pmdval)); break; case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; + pteval = pte_val(*pte); + *old = !!(pteval & _PAGE_PRESENT); + pteval &= ~_PAGE_PRESENT; if (present) pteval |= _PAGE_PRESENT; set_pte_atomic(pte, __pte(pteval)); @@ -148,19 +150,39 @@ static int set_page_present(unsigned long addr, bool present, return 0; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/* + * Mark the given page as not present. Access to it will trigger a fault. + * + * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the + * protection is ignored here. RCU read lock is assumed held, so the struct + * will not disappear unexpectedly. Furthermore, the caller must guarantee, + * that double arming the same virtual address (page) cannot occur. + * + * Double disarming on the other hand is allowed, and may occur when a fault + * and mmiotrace shutdown happen simultaneously. + */ +static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, false, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + int ret; + WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + if (f->armed) { + pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, f->old_presence); + } + ret = set_page_presence(f->page, false, &f->old_presence); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + f->armed = true; return ret; } -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/** Restore the given page to saved presence state. */ +static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, true, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); + bool tmp; + int ret = set_page_presence(f->page, f->old_presence, &tmp); + WARN_ONCE(ret < 0, + KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + f->armed = false; } /* @@ -207,7 +229,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); + disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* * On SMP we sometimes get recursive probe hits on the @@ -249,7 +271,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); + disarm_kmmio_fault_page(ctx->fpage); /* * If another cpu accesses the same page while we are stepping, @@ -288,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -320,19 +342,19 @@ static int add_kmmio_fault_page(unsigned long page) f = get_kmmio_fault_page(page); if (f) { if (!f->count) - arm_kmmio_fault_page(f->page, NULL); + arm_kmmio_fault_page(f); f->count++; return 0; } - f = kmalloc(sizeof(*f), GFP_ATOMIC); + f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -1; f->count = 1; f->page = page; - if (arm_kmmio_fault_page(f->page, NULL)) { + if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } @@ -356,7 +378,7 @@ static void release_kmmio_fault_page(unsigned long page, f->count--; BUG_ON(f->count < 0); if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); + disarm_kmmio_fault_page(f); f->release_next = *release_list; *release_list = f; } -- cgit v1.2.3-70-g09d2 From 0b700a6a253b6a3b3059bb9a9247a73490ee33fb Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:12:48 +0200 Subject: x86 mmiotrace: split set_page_presence() From 36772dcb6ffbbb68254cbfc379a103acd2fbfefc Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 28 Feb 2009 21:34:59 +0200 Split set_page_presence() in kmmio.c into two more functions set_pmd_presence() and set_pte_presence(). Purely code reorganization, no functional changes. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index be361eb828c..d29777520af 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -107,12 +107,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } +static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +{ + pmdval_t v = pmd_val(*pmd); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(v)); +} + +static void set_pte_presence(pte_t *pte, bool present, bool *old) +{ + pteval_t v = pte_val(*pte); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(v)); +} + static int set_page_presence(unsigned long addr, bool present, bool *old) { - pteval_t pteval; - pmdval_t pmdval; unsigned int level; - pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); if (!pte) { @@ -122,31 +139,17 @@ static int set_page_presence(unsigned long addr, bool present, bool *old) switch (level) { case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd); - *old = !!(pmdval & _PAGE_PRESENT); - pmdval &= ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); + set_pmd_presence((pmd_t *)pte, present, old); break; - case PG_LEVEL_4K: - pteval = pte_val(*pte); - *old = !!(pteval & _PAGE_PRESENT); - pteval &= ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); + set_pte_presence(pte, present, old); break; - default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } __flush_tlb_one(addr); - return 0; } -- cgit v1.2.3-70-g09d2 From 3e39aa156a24ce386da378784edd0f748c770087 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Thu, 5 Feb 2009 11:02:02 +0000 Subject: x86 mmiotrace: improve handling of secondary faults Upgrade some kmmio.c debug messages to warnings. Allow secondary faults on probed pages to fall through, and only log secondary faults that are not due to non-present pages. Patch edited by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index d29777520af..4c66bd3a240 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -232,28 +232,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. + * A second fault on the same page means some other + * condition needs handling by do_page_fault(), the + * page really not being present is the most common. */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " + pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); + + if (!faultpage->old_presence) + pr_info("kmmio: unexpected secondary hit for " + "address 0x%08lx on CPU %d.\n", addr, + smp_processor_id()); + } else { + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at + * least prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + disarm_kmmio_fault_page(faultpage); + } goto no_kmmio_ctx; } ctx->active++; @@ -305,7 +309,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + pr_warning("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } -- cgit v1.2.3-70-g09d2 From 340430c572f7b2b275d39965e88bafa71693cb23 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 24 Feb 2009 21:44:15 +0200 Subject: x86 mmiotrace: fix race with release_kmmio_fault_page() There was a theoretical possibility to a race between arming a page in post_kmmio_handler() and disarming the page in release_kmmio_fault_page(): cpu0 cpu1 ------------------------------------------------------------------ mmiotrace shutdown enter release_kmmio_fault_page fault on the page disarm the page disarm the page handle the MMIO access re-arm the page put the page on release list remove_kmmio_fault_pages() fault on the page page not known to mmiotrace fall back to do_page_fault() *KABOOM* (This scenario also shows the double disarm case which is allowed.) Fixed by acquiring kmmio_lock in post_kmmio_handler() and checking if the page is being released from mmiotrace. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4c66bd3a240..9f205030d9a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -38,7 +38,8 @@ struct kmmio_fault_page { /* * Number of times this page has been registered as a part * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). + * Used only by writers (RCU) and post_kmmio_handler(). + * Protected by kmmio_lock, when linked into kmmio_page_table. */ int count; }; @@ -317,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage); + /* Prevent racing against release_kmmio_fault_page(). */ + spin_lock(&kmmio_lock); + if (ctx->fpage->count) + arm_kmmio_fault_page(ctx->fpage); + spin_unlock(&kmmio_lock); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; -- cgit v1.2.3-70-g09d2 From f254f3909efaf59ca2d0f408de2d044dace60706 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Mar 2009 12:02:57 -0800 Subject: x86: un-__init fill_pud/pmd/pte They are used by __set_fixmap->set_pte_vaddr_pud, which can be used by arch_setup_additional_pages(), and so is used after init. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 11981fc8570..07f44d491df 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -168,7 +168,7 @@ static __ref void *spp_getpage(void) return ptr; } -static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) +static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr) { if (pgd_none(*pgd)) { pud_t *pud = (pud_t *)spp_getpage(); @@ -180,7 +180,7 @@ static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) return pud_offset(pgd, vaddr); } -static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) +static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) { if (pud_none(*pud)) { pmd_t *pmd = (pmd_t *) spp_getpage(); @@ -192,7 +192,7 @@ static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) return pmd_offset(pud, vaddr); } -static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr) +static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) { if (pmd_none(*pmd)) { pte_t *pte = (pte_t *) spp_getpage(); -- cgit v1.2.3-70-g09d2 From 540aca06b737cc38965b52eeceefba3d24376461 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 4 Mar 2009 11:46:40 +0200 Subject: x86: move devmem_is_allowed() to common mm/init.c Impact: cleanup The function is identical on 32-bit and 64-bit configurations so move it to the common mm/init.c file. Signed-off-by: Pekka Enberg LKML-Reference: <1236160001.29024.29.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 24 ++++++++++++++++++++++++ arch/x86/mm/init_32.c | 21 --------------------- arch/x86/mm/init_64.c | 22 ---------------------- 3 files changed, 24 insertions(+), 43 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce6a722587d..f89df52683c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1,9 +1,33 @@ +#include #include + #include #include +#include #include #include +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + void free_init_pages(char *what, unsigned long begin, unsigned long end) { unsigned long addr = begin; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0b087dcd2c1..917c4e60c76 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -354,27 +354,6 @@ repeat: } } -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} - pte_t *kmap_pte; pgprot_t kmap_prot; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 724e537432e..074435e7982 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -876,28 +876,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} - - static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; -- cgit v1.2.3-70-g09d2 From 6298e719cf388f43b674f43799af467d3e4e5aa7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 4 Mar 2009 10:16:07 +0200 Subject: x86: set_highmem_pages_init() cleanup, #2 Impact: cleanup The zones are set up at this stage so there's a highmem zone available even for the UMA case. The only difference there is that for machines that have CONFIG_HIGHMEM enabled but don't have any highmem available, ->zone_start_pfn is zero whereas highstart_pfn is non-zero). The field is left zeroed because of the !size test in free_area_init_core() but shouldn't be a problem because add_highpages_with_active_regions() handles empty ranges just fine. Signed-off-by: Pekka Enberg Cc: Mel Gorman LKML-Reference: <1236154567.29024.23.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 00f127c80b0..d11745334a6 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -158,7 +158,6 @@ EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); -#ifdef CONFIG_NUMA void __init set_highmem_pages_init(void) { struct zone *zone; @@ -182,11 +181,3 @@ void __init set_highmem_pages_init(void) } totalram_pages += totalhigh_pages; } -#else -void __init set_highmem_pages_init(void) -{ - add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); - - totalram_pages += totalhigh_pages; -} -#endif /* CONFIG_NUMA */ -- cgit v1.2.3-70-g09d2 From a71edd1f46c8a599509bda478fb4eea27fb0da63 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 4 Mar 2009 01:22:35 -0800 Subject: x86: fix bootmem cross node for 32bit numa Impact: fix panic on system 2g x4 sockets Found one system with 4 sockets and every sockets has 2g can not boot with numa32 because boot mem is crossing nodes. So try to have numa version of setup_bootmem_allocator(). Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49AE485B.8000902@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 46 ++++++++++++++++++++++++++++++++++++++++------ arch/x86/mm/numa_32.c | 5 +++-- 2 files changed, 43 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 917c4e60c76..67bdb59d4e1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -776,9 +776,37 @@ static void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } +#ifdef CONFIG_NEED_MULTIPLE_NODES +static unsigned long __init setup_node_bootmem(int nodeid, + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long bootmap) +{ + unsigned long bootmap_size; + + if (start_pfn > max_low_pfn) + return bootmap; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; + + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(nodeid), + bootmap >> PAGE_SHIFT, + start_pfn, end_pfn); + printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", + nodeid, start_pfn<> PAGE_SHIFT, - min_low_pfn, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped<> PAGE_SHIFT, + min_low_pfn, max_low_pfn); printk(KERN_INFO " bootmap %08lx - %08lx\n", bootmap, bootmap + bootmap_size); - for_each_online_node(i) - free_bootmem_with_active_regions(i, max_low_pfn); + free_bootmem_with_active_regions(0, max_low_pfn); early_res_to_bootmem(0, max_low_pfn<bdata = &bootmem_node_data[nid]; + } - NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } -- cgit v1.2.3-70-g09d2 From b68adb16f29c8ea02f21f5ebf65bcabffe217e9f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 4 Mar 2009 01:24:04 -0800 Subject: x86: make 32-bit init_memory_mapping range change more like 64-bit Impact: cleanup make code more readable and more like 64-bit Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49AE48B4.8010907@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 126 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 94 insertions(+), 32 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 67bdb59d4e1..37aeaf366d5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -885,29 +885,55 @@ static void __init find_early_table_space(unsigned long end, int use_pse) (table_start << PAGE_SHIFT) + tables); } +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#define NR_RANGE_MR 3 + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<> PAGE_SHIFT; - end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); - } else { - /* head is not big page alignment ? */ - start_pfn = start >> PAGE_SHIFT; - end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + /* head could not be big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; + if (pos == 0) + end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + else + end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (end_pfn > (end>>PAGE_SHIFT)) + end_pfn = end>>PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; } - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); /* big page range */ - start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < (big_page_start >> PAGE_SHIFT)) - start_pfn = big_page_start >> PAGE_SHIFT; end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, - use_pse); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1< (big_page_start>>PAGE_SHIFT)) { - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) - kernel_physical_mapping_init(pgd_base, start_pfn, - end_pfn, 0); + start_pfn = pos>>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + if (start_pfn < end_pfn) + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof(struct map_range)); + mr[i--].start = old_start; + nr_range--; } + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %08lx - %08lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1<> PAGE_SHIFT, + mr[i].end >> PAGE_SHIFT, + mr[i].page_size_mask == (1< Date: Wed, 4 Mar 2009 11:13:40 +0200 Subject: x86: move free_initrd_mem() to common mm/init.c Impact: cleanup The function is identical on 32-bit and 64-bit configurations so move it to the common mm/init.c file. Signed-off-by: Pekka Enberg LKML-Reference: <1236158020.29024.28.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 7 +++++++ arch/x86/mm/init_32.c | 7 ------- arch/x86/mm/init_64.c | 7 ------- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f89df52683c..cc7fe660f33 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -71,3 +71,10 @@ void free_initmem(void) (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 37aeaf366d5..c69c6b1f5e5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1275,13 +1275,6 @@ void mark_rodata_ro(void) } #endif -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 074435e7982..d325186dd32 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -963,13 +963,6 @@ void mark_rodata_ro(void) #endif -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { -- cgit v1.2.3-70-g09d2 From fc5efe3941c47c0278fe1bbcf8cc02a03a74fcda Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 4 Mar 2009 12:21:24 -0800 Subject: x86: fix bootmem cross node for 32bit numa, cleanup Impact: clean up Simplify the code, reuse some lines. Remove min_low_pfn reference, it is always 0 Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49AEE2C4.2030602@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c69c6b1f5e5..c351456d06d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -776,7 +776,6 @@ static void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -#ifdef CONFIG_NEED_MULTIPLE_NODES static unsigned long __init setup_node_bootmem(int nodeid, unsigned long start_pfn, unsigned long end_pfn, @@ -802,7 +801,6 @@ static unsigned long __init setup_node_bootmem(int nodeid, return bootmap + bootmap_size; } -#endif void __init setup_bootmem_allocator(void) { @@ -812,8 +810,7 @@ void __init setup_bootmem_allocator(void) * Initialize the boot-time allocator (with low memory only): */ bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> PAGE_SHIFT, - min_low_pfn, max_low_pfn); - printk(KERN_INFO " bootmap %08lx - %08lx\n", - bootmap, bootmap + bootmap_size); - free_bootmem_with_active_regions(0, max_low_pfn); - early_res_to_bootmem(0, max_low_pfn< Date: Thu, 5 Mar 2009 14:54:52 +0200 Subject: x86: init_memory_mapping() trivial cleanups Impact: cleanup To reduce the diff between the 32-bit and 64-bit versions of init_memory_mapping(), fix up all trivial issues. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-1-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 42 +++++++++++++++++++++++++----------------- arch/x86/mm/init_64.c | 26 +++++++++++++++----------- 2 files changed, 40 insertions(+), 28 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c351456d06d..ad4e03c2d4d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -868,11 +868,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse) table_start >>= PAGE_SHIFT; table_end = table_start; - table_top = table_start + (tables>>PAGE_SHIFT); + table_top = table_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, - (table_start << PAGE_SHIFT) + tables); + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); } struct map_range { @@ -899,8 +898,13 @@ static int save_mr(struct map_range *mr, int nr_range, return nr_range; } +/* + * Setup the direct mapping of the physical memory at PAGE_OFFSET. + * This runs before bootmem is initialized and gets pages directly from + * the physical memory. To access them they are temporarily mapped. + */ unsigned long __init_refok init_memory_mapping(unsigned long start, - unsigned long end) + unsigned long end) { pgd_t *pgd_base = swapper_pg_dir; unsigned long page_size_mask = 0; @@ -911,7 +915,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, int nr_range, i; int use_pse; - printk(KERN_INFO "init_memory_mapping: %08lx-%08lx\n", start, end); + printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); #ifdef CONFIG_DEBUG_PAGEALLOC /* @@ -940,19 +944,19 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __supported_pte_mask |= _PAGE_GLOBAL; } - memset(mr, 0, sizeof(mr)); - nr_range = 0; - if (use_pse) page_size_mask |= 1 << PG_LEVEL_2M; + memset(mr, 0, sizeof(mr)); + nr_range = 0; + /* * Don't use a large page for the first 2/4MB of memory * because there are often fixed size MTRRs in there * and overlapping MTRRs into large pages can cause * slowdowns. */ - /* head could not be big page alignment ? */ + /* head if not big page alignment ? */ start_pfn = start >> PAGE_SHIFT; pos = start_pfn << PAGE_SHIFT; if (pos == 0) @@ -960,14 +964,14 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, else end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (end_pfn > (end>>PAGE_SHIFT)) - end_pfn = end>>PAGE_SHIFT; + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); pos = end_pfn << PAGE_SHIFT; } - /* big page range */ + /* big page (2M) range */ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); @@ -977,7 +981,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, pos = end_pfn << PAGE_SHIFT; } - /* tail is not big page alignment ? */ + /* tail is not big page (2M) alignment */ start_pfn = pos>>PAGE_SHIFT; end_pfn = end>>PAGE_SHIFT; if (start_pfn < end_pfn) @@ -998,13 +1002,17 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, } for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %08lx - %08lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1<> PUD_SHIFT; tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + if (use_gbpages) { unsigned long extra; + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (use_pse) { unsigned long extra; + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* @@ -647,7 +652,6 @@ static int save_mr(struct map_range *mr, int nr_range, unsigned long start_pfn, unsigned long end_pfn, unsigned long page_size_mask) { - if (start_pfn < end_pfn) { if (nr_range >= NR_RANGE_MR) panic("run out of range for init_memory_mapping\n"); @@ -679,13 +683,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - /* - * Find space for the kernel direct mapping tables. - * - * Later we should allocate these tables in the local node of the - * memory mapped. Unfortunately this is done currently before the - * nodes are discovered. - */ if (!after_bootmem) init_gbpages(); @@ -709,7 +706,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, memset(mr, 0, sizeof(mr)); nr_range = 0; - /* head if not big page alignment ?*/ + /* head if not big page alignment ? */ start_pfn = start >> PAGE_SHIFT; pos = start_pfn << PAGE_SHIFT; end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) @@ -721,7 +718,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, pos = end_pfn << PAGE_SHIFT; } - /* big page (2M) range*/ + /* big page (2M) range */ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) @@ -769,7 +766,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* move it */ old_start = mr[i].start; memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof (struct map_range)); + (nr_range - 1 - i) * sizeof(struct map_range)); mr[i--].start = old_start; nr_range--; } @@ -780,6 +777,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, (mr[i].page_size_mask & (1< Date: Thu, 5 Mar 2009 14:54:53 +0200 Subject: x86: add gbpages support to 32-bit init_memory_mapping() Impact: cleanup To reduce the diff between the 32-bit and 64-bit versions of init_memory_mapping(), add gbpages support to the 32-bit version. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-2-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad4e03c2d4d..5fad0f95d5a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -65,6 +65,8 @@ static unsigned long __meminitdata table_top; static int __initdata after_init_bootmem; +int direct_gbpages; + static __init void *alloc_low_page(void) { unsigned long pfn = table_end++; @@ -831,14 +833,22 @@ void __init setup_bootmem_allocator(void) after_init_bootmem = 1; } -static void __init find_early_table_space(unsigned long end, int use_pse) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) { unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + if (use_gbpages) { + unsigned long extra; + + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (use_pse) { @@ -913,7 +923,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, struct map_range mr[NR_RANGE_MR]; int nr_range, i; - int use_pse; + int use_pse, use_gbpages; printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); @@ -923,9 +933,10 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - use_pse = 0; + use_pse = use_gbpages = 0; #else use_pse = cpu_has_pse; + use_gbpages = direct_gbpages; #endif #ifdef CONFIG_X86_PAE @@ -944,6 +955,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __supported_pte_mask |= _PAGE_GLOBAL; } + if (use_gbpages) + page_size_mask |= 1 << PG_LEVEL_1G; if (use_pse) page_size_mask |= 1 << PG_LEVEL_2M; @@ -1015,7 +1028,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_init_bootmem) - find_early_table_space(end, use_pse); + find_early_table_space(end, use_pse, use_gbpages); for (i = 0; i < nr_range; i++) kernel_physical_mapping_init(pgd_base, -- cgit v1.2.3-70-g09d2 From 49a2bf7303b0dc5fccbb3ff7cf2e7751f0e3953d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:54:54 +0200 Subject: x86: find_early_table_space() unification Impact: cleanup There are some minor differences between the 32-bit and 64-bit find_early_table_space() functions. This patch wraps those differences under CONFIG_X86_32 to make the function identical on both configurations. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-3-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 9 +++++++++ arch/x86/mm/init_64.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5fad0f95d5a..86a99947455 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -855,24 +855,33 @@ static void __init find_early_table_space(unsigned long end, int use_pse, unsigned long extra; extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 extra += PMD_SIZE; +#endif ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); +#ifdef CONFIG_X86_32 /* for fixmap */ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif /* * RED-PEN putting page tables only on node 0 could * cause a hotspot and fill up ZONE_DMA. The page tables * need roughly 0.5KB per GB. */ +#ifdef CONFIG_X86_32 start = 0x7000; table_start = find_e820_area(start, max_pfn_mapped<>PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 + extra += PMD_SIZE; +#endif ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); +#ifdef CONFIG_X86_32 + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif + /* * RED-PEN putting page tables only on node 0 could * cause a hotspot and fill up ZONE_DMA. The page tables * need roughly 0.5KB per GB. */ +#ifdef CONFIG_X86_32 + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped< Date: Thu, 5 Mar 2009 14:54:55 +0200 Subject: x86: move pgd_base out of init_memory_mapping() Impact: cleanup This patch moves pgd_base out of init_memory_mapping() to reduce the diff between the 32-bit version and the 64-bit version of the function. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-4-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 86a99947455..cfc68d60138 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -227,11 +227,11 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base, - unsigned long start_pfn, +static void __init kernel_physical_mapping_init(unsigned long start_pfn, unsigned long end_pfn, int use_pse) { + pgd_t *pgd_base = swapper_pg_dir; int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; pgd_t *pgd; @@ -509,8 +509,9 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) +static void __init early_ioremap_page_table_range_init(void) { + pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; /* @@ -925,7 +926,6 @@ static int save_mr(struct map_range *mr, int nr_range, unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) { - pgd_t *pgd_base = swapper_pg_dir; unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; unsigned long pos; @@ -1040,12 +1040,12 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, find_early_table_space(end, use_pse, use_gbpages); for (i = 0; i < nr_range; i++) - kernel_physical_mapping_init(pgd_base, + kernel_physical_mapping_init( mr[i].start >> PAGE_SHIFT, mr[i].end >> PAGE_SHIFT, mr[i].page_size_mask == (1< Date: Thu, 5 Mar 2009 14:54:56 +0200 Subject: x86: ifdef 32-bit specific setup in init_memory_mapping() Impact: cleanup Enabling NX, PSE, and PGE are only required on 32-bit so ifdef them in both versions of the function. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-5-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cfc68d60138..eb98cb90cb3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -948,6 +948,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, use_gbpages = direct_gbpages; #endif +#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_PAE set_nx(); if (nx_enabled) @@ -963,6 +964,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, set_in_cr4(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } +#endif if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 151e5ba3441..c3c0be5b637 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -712,6 +712,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, use_gbpages = direct_gbpages; #endif +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); +#endif + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } +#endif + if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; if (use_pse) -- cgit v1.2.3-70-g09d2 From 96083ca11bc85265c7ef9e791a57e3514d8f605a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:54:57 +0200 Subject: x86: remove unnecessary save_mr() sanity check Impact: cleanup The save_mr() function already checks that start_pfn is less than end_pfn so we can remove the unnecessary check which reduces the diff between the 32-bit and the 64-bit versions of init_memory_mapping(). Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-6-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index eb98cb90cb3..559715b488b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1008,8 +1008,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* tail is not big page (2M) alignment */ start_pfn = pos>>PAGE_SHIFT; end_pfn = end>>PAGE_SHIFT; - if (start_pfn < end_pfn) - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); /* try to merge same page size and continuous */ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { -- cgit v1.2.3-70-g09d2 From c464573cb3d3bdd45eed8f5f59596f84ede95a0c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:54:58 +0200 Subject: x86: rename after_init_bootmem to after_bootmem in mm/init_32.c Impact: cleanup This patch renames after_init_bootmem to after_bootmem in mm/init_32.c to reduce the diff to the 64-bit version of of init_memory_mapping(). Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-7-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 559715b488b..cc5c3992385 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -63,7 +63,7 @@ static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; -static int __initdata after_init_bootmem; +int after_bootmem; int direct_gbpages; @@ -92,7 +92,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_init_bootmem) + if (after_bootmem) pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); else pmd_table = (pmd_t *)alloc_low_page(); @@ -119,7 +119,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; - if (after_init_bootmem) { + if (after_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif @@ -158,7 +158,7 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, pte_t *newpte; int i; - BUG_ON(after_init_bootmem); + BUG_ON(after_bootmem); newpte = alloc_low_page(); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(newpte + i, pte[i]); @@ -831,7 +831,7 @@ void __init setup_bootmem_allocator(void) bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); #endif - after_init_bootmem = 1; + after_bootmem = 1; } static void __init find_early_table_space(unsigned long end, int use_pse, @@ -1037,7 +1037,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * memory mapped. Unfortunately this is done currently before the * nodes are discovered. */ - if (!after_init_bootmem) + if (!after_bootmem) find_early_table_space(end, use_pse, use_gbpages); for (i = 0; i < nr_range; i++) @@ -1052,11 +1052,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); - if (!after_init_bootmem) + if (!after_bootmem) reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT, "PGTABLE"); - if (!after_init_bootmem) + if (!after_bootmem) early_memtest(start, end); return end >> PAGE_SHIFT; -- cgit v1.2.3-70-g09d2 From cbba65796df99f3ca9bf70d14e5a19384c54b6a1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:54:59 +0200 Subject: x86: unify kernel_physical_mapping_init() call in init_memory_mapping() Impact: cleanup The 64-bit version of init_memory_mapping() uses the last mapped address returned from kernel_physical_mapping_init() whereas the 32-bit version doesn't. This patch adds relevant ifdefs to both versions of the function to reduce the diff between them. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-8-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 10 +++++++++- arch/x86/mm/init_64.c | 21 +++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cc5c3992385..00c1d850825 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -929,6 +929,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; unsigned long pos; + unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; @@ -1040,11 +1041,18 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, if (!after_bootmem) find_early_table_space(end, use_pse, use_gbpages); +#ifdef CONFIG_X86_32 for (i = 0; i < nr_range; i++) kernel_physical_mapping_init( mr[i].start >> PAGE_SHIFT, mr[i].end >> PAGE_SHIFT, mr[i].page_size_mask == (1<> PAGE_SHIFT; + return ret >> PAGE_SHIFT; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c3c0be5b637..e4fadea2e52 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -686,10 +686,10 @@ static int save_mr(struct map_range *mr, int nr_range, unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) { - unsigned long last_map_addr = 0; unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; unsigned long pos; + unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; @@ -819,10 +819,18 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, if (!after_bootmem) find_early_table_space(end, use_pse, use_gbpages); +#ifdef CONFIG_X86_32 + for (i = 0; i < nr_range; i++) + kernel_physical_mapping_init( + mr[i].start >> PAGE_SHIFT, + mr[i].end >> PAGE_SHIFT, + mr[i].page_size_mask == (1<> PAGE_SHIFT; + return ret >> PAGE_SHIFT; } #ifndef CONFIG_NUMA -- cgit v1.2.3-70-g09d2 From d58e854e36ddf241ebc243e4122c5ab087bf38df Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:00 +0200 Subject: x86: add table start and end sanity checks to 32-bit init_memory_mapping() Impact: cleanup This patch adds a sanity check to the 32-bit version of init_memory_mapping() to reduce the diff to the 64-bit version. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-9-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 00c1d850825..0a3707fb973 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1060,7 +1060,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); - if (!after_bootmem) + if (!after_bootmem && table_end > table_start) reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT, "PGTABLE"); -- cgit v1.2.3-70-g09d2 From 01ced9ec14ad1b4f8a533c2f2b5a4fe4c92c1099 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:01 +0200 Subject: x86: ifdef 32-bit and 64-bit setup in init_memory_mapping() Impact: cleanup To reduce the diff between the 32-bit and 64-bit versions of init_memory_mapping(), ifdef configuration specific setup code in the function. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-10-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 6 ++++++ arch/x86/mm/init_64.c | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0a3707fb973..3f91bdc2097 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1054,10 +1054,16 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, mr[i].page_size_mask); #endif +#ifdef CONFIG_X86_32 early_ioremap_page_table_range_init(); load_cr3(swapper_pg_dir); +#endif +#ifdef CONFIG_X86_64 + if (!after_bootmem) + mmu_cr4_features = read_cr4(); +#endif __flush_tlb_all(); if (!after_bootmem && table_end > table_start) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e4fadea2e52..5ecb23a57d2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -832,8 +832,16 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, mr[i].page_size_mask); #endif +#ifdef CONFIG_X86_32 + early_ioremap_page_table_range_init(); + + load_cr3(swapper_pg_dir); +#endif + +#ifdef CONFIG_X86_64 if (!after_bootmem) mmu_cr4_features = read_cr4(); +#endif __flush_tlb_all(); if (!after_bootmem && table_end > table_start) -- cgit v1.2.3-70-g09d2 From c338d6f60fc29dfc74bd82b91526ef43ba992bab Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:02 +0200 Subject: x86: ifdef 32-bit and 64-bit pfn setup in init_memory_mapping() Impact: cleanup To reduce the diff between the 32-bit and 64-bit versions of init_memory_mapping(), ifdef configuration specific pfn setup code in the function. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-11-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 42 +++++++++++++++++++++++++++++++++++++++--- arch/x86/mm/init_64.c | 21 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3f91bdc2097..34760e48397 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -975,20 +975,25 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, memset(mr, 0, sizeof(mr)); nr_range = 0; + /* head if not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; +#ifdef CONFIG_X86_32 /* * Don't use a large page for the first 2/4MB of memory * because there are often fixed size MTRRs in there * and overlapping MTRRs into large pages can cause * slowdowns. */ - /* head if not big page alignment ? */ - start_pfn = start >> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; if (pos == 0) end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); else end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#endif if (end_pfn > (end >> PAGE_SHIFT)) end_pfn = end >> PAGE_SHIFT; if (start_pfn < end_pfn) { @@ -999,12 +1004,43 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* big page (2M) range */ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#ifdef CONFIG_X86_32 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +#endif + + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<>PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5ecb23a57d2..d99bc6ac488 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -741,8 +741,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* head if not big page alignment ? */ start_pfn = start >> PAGE_SHIFT; pos = start_pfn << PAGE_SHIFT; +#ifdef CONFIG_X86_32 + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + if (pos == 0) + end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + else + end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#endif if (end_pfn > (end >> PAGE_SHIFT)) end_pfn = end >> PAGE_SHIFT; if (start_pfn < end_pfn) { @@ -753,16 +767,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* big page (2M) range */ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#ifdef CONFIG_X86_32 + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +#endif + if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); @@ -783,6 +803,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, page_size_mask & (1<>PAGE_SHIFT; -- cgit v1.2.3-70-g09d2 From b47e3418c52b26f6143fc696326ae52a21324551 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:03 +0200 Subject: x86: ifdef 32-bit and 64-bit NR_RANGE_MR for save_mr() unification Impact: cleanup As a trivial preparation for moving common code to arc/x86/mm/init.c, ifdef the 32-bit and 64-bit versions of NR_RANGE_MR. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-12-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 4 ++++ arch/x86/mm/init_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 34760e48397..f59e9b85163 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -900,7 +900,11 @@ struct map_range { unsigned page_size_mask; }; +#ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 +#else /* CONFIG_X86_64 */ +#define NR_RANGE_MR 5 +#endif static int save_mr(struct map_range *mr, int nr_range, unsigned long start_pfn, unsigned long end_pfn, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d99bc6ac488..d101990e463 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -660,7 +660,11 @@ struct map_range { unsigned page_size_mask; }; +#ifdef CONFIG_X86_32 +#define NR_RANGE_MR 3 +#else /* CONFIG_X86_64 */ #define NR_RANGE_MR 5 +#endif static int save_mr(struct map_range *mr, int nr_range, unsigned long start_pfn, unsigned long end_pfn, -- cgit v1.2.3-70-g09d2 From 0c0f756fd679d9747d52dad51fce3a5bb362eec3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:04 +0200 Subject: x86: add stub init_gbpages() for 32-bit init_memory_mapping() Impact: cleanup This patch adds an empty static inline init_gbpages() for the 32-bit version of init_memory_mapping() making both versions identical. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-13-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f59e9b85163..cd3c24b490a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -922,6 +922,10 @@ static int save_mr(struct map_range *mr, int nr_range, return nr_range; } +static inline void init_gbpages(void) +{ +} + /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from @@ -941,6 +945,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); + if (!after_bootmem) + init_gbpages(); + #ifdef CONFIG_DEBUG_PAGEALLOC /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. -- cgit v1.2.3-70-g09d2 From f765090a2617b8d9cb73b71e0aa850c29460d8be Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:05 +0200 Subject: x86: move init_memory_mapping() to common mm/init.c Impact: cleanup This patch moves the init_memory_mapping() function to common mm/init.c. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-14-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 328 ++++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/init_32.c | 308 ++--------------------------------------------- arch/x86/mm/init_64.c | 314 ++--------------------------------------------- 3 files changed, 342 insertions(+), 608 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index cc7fe660f33..3a21b136da2 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -2,10 +2,338 @@ #include #include +#include #include #include #include #include +#include + +#ifdef CONFIG_X86_32 +extern void __init early_ioremap_page_table_range_init(void); +extern void __init kernel_physical_mapping_init(unsigned long start_pfn, + unsigned long end_pfn, + int use_pse); +#endif + +#ifdef CONFIG_X86_64 +extern unsigned long __meminit +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask); +#endif + +unsigned long __initdata table_start; +unsigned long __meminitdata table_end; +unsigned long __meminitdata table_top; + +int after_bootmem; + +int direct_gbpages +#ifdef CONFIG_DIRECT_GBPAGES + = 1 +#endif +; + +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) +{ + unsigned long puds, pmds, ptes, tables, start; + + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + + if (use_gbpages) { + unsigned long extra; + + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 + extra += PMD_SIZE; +#endif + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + +#ifdef CONFIG_X86_32 + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ +#ifdef CONFIG_X86_32 + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); +} + +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#ifdef CONFIG_X86_32 +#define NR_RANGE_MR 3 +#else /* CONFIG_X86_64 */ +#define NR_RANGE_MR 5 +#endif + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; +#ifdef CONFIG_X86_32 + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + if (pos == 0) + end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + else + end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#endif + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } + + /* big page (2M) range */ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); +#ifdef CONFIG_X86_32 + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +#else /* CONFIG_X86_64 */ + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +#endif + + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof(struct map_range)); + mr[i--].start = old_start; + nr_range--; + } + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1<> PAGE_SHIFT, + mr[i].end >> PAGE_SHIFT, + mr[i].page_size_mask == (1< table_start) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + if (!after_bootmem) + early_memtest(start, end); + + return ret >> PAGE_SHIFT; +} + /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cd3c24b490a..187522a0c66 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -59,13 +59,9 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); -static unsigned long __initdata table_start; -static unsigned long __meminitdata table_end; -static unsigned long __meminitdata table_top; - -int after_bootmem; - -int direct_gbpages; +extern unsigned long __initdata table_start; +extern unsigned long __meminitdata table_end; +extern unsigned long __meminitdata table_top; static __init void *alloc_low_page(void) { @@ -227,9 +223,9 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(unsigned long start_pfn, - unsigned long end_pfn, - int use_pse) +void __init kernel_physical_mapping_init(unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { pgd_t *pgd_base = swapper_pg_dir; int pgd_idx, pmd_idx, pte_ofs; @@ -509,7 +505,7 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init early_ioremap_page_table_range_init(void) +void __init early_ioremap_page_table_range_init(void) { pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; @@ -834,296 +830,6 @@ void __init setup_bootmem_allocator(void) after_bootmem = 1; } -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ -#ifdef CONFIG_X86_32 - start = 0x7000; - table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; - table_end = table_start; - table_top = table_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); -} - -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#ifdef CONFIG_X86_32 -#define NR_RANGE_MR 3 -#else /* CONFIG_X86_64 */ -#define NR_RANGE_MR 5 -#endif - -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; -#ifdef CONFIG_X86_32 - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); - else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); -#endif - - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof(struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1<> PAGE_SHIFT, - mr[i].end >> PAGE_SHIFT, - mr[i].page_size_mask == (1< table_start) - reserve_early(table_start << PAGE_SHIFT, - table_end << PAGE_SHIFT, "PGTABLE"); - - if (!after_bootmem) - early_memtest(start, end); - - return ret >> PAGE_SHIFT; -} - - /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d101990e463..a32fe075608 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -61,12 +61,6 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -87,8 +81,6 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ -int after_bootmem; - pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); @@ -291,9 +283,9 @@ void __init cleanup_highmap(void) } } -static unsigned long __initdata table_start; -static unsigned long __meminitdata table_end; -static unsigned long __meminitdata table_top; +extern unsigned long __initdata table_start; +extern unsigned long __meminitdata table_end; +extern unsigned long __meminitdata table_top; static __ref void *alloc_low_page(unsigned long *phys) { @@ -547,77 +539,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ -#ifdef CONFIG_X86_32 - start = 0x7000; - table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; - table_end = table_start; - table_top = table_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); -} - -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} - -static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +unsigned long __meminit +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) { unsigned long next, last_map_addr = end; @@ -654,231 +579,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, return last_map_addr; } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#ifdef CONFIG_X86_32 -#define NR_RANGE_MR 3 -#else /* CONFIG_X86_64 */ -#define NR_RANGE_MR 5 -#endif - -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; -#ifdef CONFIG_X86_32 - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); - else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); -#endif - - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof(struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1<> PAGE_SHIFT, - mr[i].end >> PAGE_SHIFT, - mr[i].page_size_mask == (1< table_start) - reserve_early(table_start << PAGE_SHIFT, - table_end << PAGE_SHIFT, "PGTABLE"); - - if (!after_bootmem) - early_memtest(start, end); - - return ret >> PAGE_SHIFT; -} - #ifndef CONFIG_NUMA void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { -- cgit v1.2.3-70-g09d2 From 298af9d89f3f5292e81a0a00f729c415adc4d8fb Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:06 +0200 Subject: x86: fix up some bad global variable names in mm/init.c Impact: cleanup The table_start, table_end, and table_top are too generic for global namespace so rename them to be more specific. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-15-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 26 +++++++++++++------------- arch/x86/mm/init_32.c | 14 +++++++------- arch/x86/mm/init_64.c | 10 +++++----- 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3a21b136da2..5bbdfe7459d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -23,9 +23,9 @@ kernel_physical_mapping_init(unsigned long start, unsigned long page_size_mask); #endif -unsigned long __initdata table_start; -unsigned long __meminitdata table_end; -unsigned long __meminitdata table_top; +unsigned long __initdata e820_table_start; +unsigned long __meminitdata e820_table_end; +unsigned long __meminitdata e820_table_top; int after_bootmem; @@ -78,21 +78,21 @@ static void __init find_early_table_space(unsigned long end, int use_pse, */ #ifdef CONFIG_X86_32 start = 0x7000; - table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; - table_end = table_start; - table_top = table_start + (tables >> PAGE_SHIFT); + e820_table_start >>= PAGE_SHIFT; + e820_table_end = e820_table_start; + e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); + end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); } struct map_range { @@ -324,9 +324,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, #endif __flush_tlb_all(); - if (!after_bootmem && table_end > table_start) - reserve_early(table_start << PAGE_SHIFT, - table_end << PAGE_SHIFT, "PGTABLE"); + if (!after_bootmem && e820_table_end > e820_table_start) + reserve_early(e820_table_start << PAGE_SHIFT, + e820_table_end << PAGE_SHIFT, "PGTABLE"); if (!after_bootmem) early_memtest(start, end); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 187522a0c66..e9df0d9cdeb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -59,16 +59,16 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); -extern unsigned long __initdata table_start; -extern unsigned long __meminitdata table_end; -extern unsigned long __meminitdata table_top; +extern unsigned long __initdata e820_table_start; +extern unsigned long __meminitdata e820_table_end; +extern unsigned long __meminitdata e820_table_top; static __init void *alloc_low_page(void) { - unsigned long pfn = table_end++; + unsigned long pfn = e820_table_end++; void *adr; - if (pfn >= table_top) + if (pfn >= e820_table_top) panic("alloc_low_page: ran out of memory"); adr = __va(pfn * PAGE_SIZE); @@ -149,8 +149,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, if (pmd_idx_kmap_begin != pmd_idx_kmap_end && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < table_start - || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { + && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start + || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { pte_t *newpte; int i; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a32fe075608..a1d33c58b49 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -283,13 +283,13 @@ void __init cleanup_highmap(void) } } -extern unsigned long __initdata table_start; -extern unsigned long __meminitdata table_end; -extern unsigned long __meminitdata table_top; +extern unsigned long __initdata e820_table_start; +extern unsigned long __meminitdata e820_table_end; +extern unsigned long __meminitdata e820_table_top; static __ref void *alloc_low_page(unsigned long *phys) { - unsigned long pfn = table_end++; + unsigned long pfn = e820_table_end++; void *adr; if (after_bootmem) { @@ -299,7 +299,7 @@ static __ref void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= table_top) + if (pfn >= e820_table_top) panic("alloc_low_page: ran out of memory"); adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); -- cgit v1.2.3-70-g09d2 From e53fb04fce6d246ebed755b904ed1b0b814a754c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:07 +0200 Subject: x86: unify kernel_physical_mapping_init() function signatures Impact: cleanup In preparation for moving the function declaration to a header file, unify 32-bit and 64-bit signatures. Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-16-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 13 +++---------- arch/x86/mm/init_32.c | 13 ++++++++++--- arch/x86/mm/init_64.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 5bbdfe7459d..6475693a81a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -11,17 +11,12 @@ #ifdef CONFIG_X86_32 extern void __init early_ioremap_page_table_range_init(void); -extern void __init kernel_physical_mapping_init(unsigned long start_pfn, - unsigned long end_pfn, - int use_pse); #endif -#ifdef CONFIG_X86_64 -extern unsigned long __meminit +extern unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask); -#endif unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; @@ -301,10 +296,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, #ifdef CONFIG_X86_32 for (i = 0; i < nr_range; i++) - kernel_physical_mapping_init( - mr[i].start >> PAGE_SHIFT, - mr[i].end >> PAGE_SHIFT, - mr[i].page_size_mask == (1<> PAGE_SHIFT; + end_pfn = end >> PAGE_SHIFT; + /* * First iteration will setup identity mapping using large/small pages * based on use_pse, with other attributes same as set by @@ -350,6 +356,7 @@ repeat: mapping_iter = 2; goto repeat; } + return 0; } pte_t *kmap_pte; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a1d33c58b49..f441ae31631 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -539,7 +539,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -unsigned long __meminit +unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) -- cgit v1.2.3-70-g09d2 From 4fcb208391be5cf82c6fe2779c5eb9245ac97e91 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 14:55:08 +0200 Subject: x86: move function and variable declarations to asm/init.h Impact: cleanup Signed-off-by: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-17-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/init.h | 18 ++++++++++++++++++ arch/x86/mm/init.c | 10 +--------- arch/x86/mm/init_32.c | 6 +----- arch/x86/mm/init_64.c | 5 +---- 4 files changed, 21 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/init.h (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h new file mode 100644 index 00000000000..36fb1a6a510 --- /dev/null +++ b/arch/x86/include/asm/init.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_INIT_32_H +#define _ASM_X86_INIT_32_H + +#ifdef CONFIG_X86_32 +extern void __init early_ioremap_page_table_range_init(void); +#endif + +extern unsigned long __init +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask); + + +extern unsigned long __initdata e820_table_start; +extern unsigned long __meminitdata e820_table_end; +extern unsigned long __meminitdata e820_table_top; + +#endif /* _ASM_X86_INIT_32_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6475693a81a..6d63e3d1253 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -3,21 +3,13 @@ #include #include +#include #include #include #include #include #include -#ifdef CONFIG_X86_32 -extern void __init early_ioremap_page_table_range_init(void); -#endif - -extern unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask); - unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; unsigned long __meminitdata e820_table_top; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5ca9c6c3439..1669693e97d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,6 +49,7 @@ #include #include #include +#include unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -58,11 +59,6 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); - -extern unsigned long __initdata e820_table_start; -extern unsigned long __meminitdata e820_table_end; -extern unsigned long __meminitdata e820_table_top; - static __init void *alloc_low_page(void) { unsigned long pfn = e820_table_end++; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f441ae31631..7dd7ce49d69 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -48,6 +48,7 @@ #include #include #include +#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -283,10 +284,6 @@ void __init cleanup_highmap(void) } } -extern unsigned long __initdata e820_table_start; -extern unsigned long __meminitdata e820_table_end; -extern unsigned long __meminitdata e820_table_top; - static __ref void *alloc_low_page(unsigned long *phys) { unsigned long pfn = e820_table_end++; -- cgit v1.2.3-70-g09d2 From 62436fe9ee10f5e0dd087b106d69d93c9549935a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 14:39:03 +0100 Subject: x86: move init_memory_mapping() to common mm/init.c, build fix on 32-bit PAE Impact: build fix Cc: Pekka Enberg Cc: Yinghai Lu LKML-Reference: <1236257708-27269-14-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/init_32.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4d258ad76a0..b8238dc8786 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,7 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; +extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1669693e97d..5e5126e0d54 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -605,7 +605,7 @@ static int __init noexec_setup(char *str) } early_param("noexec", noexec_setup); -static void __init set_nx(void) +void __init set_nx(void) { unsigned int v[4], l, h; -- cgit v1.2.3-70-g09d2 From dc16ecf7fd1fad7436832121435d4926a81d469e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 16:10:44 -0800 Subject: x86-32: use specific __vmalloc_start_set flag in __virt_addr_valid Rather than relying on the ever-unreliable system_state, add a specific __vmalloc_start_set flag to indicate whether the vmalloc area has meaningful boundaries yet, and use that in x86-32's __phys_addr and __virt_addr_valid. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32_types.h | 5 +++++ arch/x86/mm/init_32.c | 4 ++++ arch/x86/mm/ioremap.c | 7 +++---- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index bd8df3b2fe0..2733fad45f9 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -25,6 +25,11 @@ * area for the same reason. ;) */ #define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLER__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5e5126e0d54..d57dfffb021 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -59,6 +59,8 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); +bool __read_mostly __vmalloc_start_set = false; + static __init void *alloc_low_page(void) { unsigned long pfn = e820_table_end++; @@ -757,6 +759,8 @@ void __init initmem_init(unsigned long start_pfn, #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif + __vmalloc_start_set = true; + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 433f7bd4648..a23ca5b5bf2 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -76,10 +76,9 @@ static inline int phys_addr_valid(unsigned long addr) #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - /* VMALLOC_* aren't constants; not available at the boot time */ + /* VMALLOC_* aren't constants */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && - is_vmalloc_addr((void *) x)); + VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); @@ -89,7 +88,7 @@ bool __virt_addr_valid(unsigned long x) { if (x < PAGE_OFFSET) return false; - if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) + if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } -- cgit v1.2.3-70-g09d2 From ed26dbe5ae045e5bf95c6dc27497397a3fde52e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 16:16:51 -0800 Subject: x86: pre-initialize boot_cpu_data.x86_phys_bits to avoid system_state tests Impact: cleanup, micro-optimization Pre-initialize boot_cpu_data.x86_phys_bits to a reasonable default to remove the use of system_state tests in __virt_addr_valid() and __phys_addr(). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 +++- arch/x86/mm/ioremap.c | 7 ++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b746deb9ebc..f28c56e6bf9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -202,7 +202,9 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { + .x86_phys_bits = MAX_PHYSMEM_BITS, +}; EXPORT_SYMBOL(boot_cpu_data); #endif diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a23ca5b5bf2..62773abdf08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -38,8 +38,7 @@ unsigned long __phys_addr(unsigned long x) } else { VIRTUAL_BUG_ON(x < PAGE_OFFSET); x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : - !phys_addr_valid(x)); + VIRTUAL_BUG_ON(!phys_addr_valid(x)); } return x; } @@ -56,10 +55,8 @@ bool __virt_addr_valid(unsigned long x) if (x < PAGE_OFFSET) return false; x -= PAGE_OFFSET; - if (system_state == SYSTEM_BOOTING ? - x > MAXMEM : !phys_addr_valid(x)) { + if (!phys_addr_valid(x)) return false; - } } return pfn_valid(x >> PAGE_SHIFT); -- cgit v1.2.3-70-g09d2 From d1a8e7792047f7dca7eb5759250e2c12800bf262 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 6 Mar 2009 03:12:50 -0800 Subject: x86: make "memtest" like "memtest=17" Impact: make boot command line "memtest" do one loop by default So don't need to guess many patterns in one loop. Signed-off-by: Yinghai Lu LKML-Reference: <49B10532.3020105@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d03..605c8be0621 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + return 0; } -- cgit v1.2.3-70-g09d2 From c77a3b59c624454c501cbfa1a3611d5a00bf9532 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 17:04:26 +0200 Subject: x86: fix uninitialized variable in init_memory_mapping() Signed-off-by: Pekka Enberg LKML-Reference: <1236265466.31324.9.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6d63e3d1253..15219e0d124 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -134,8 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, { unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long ret = 0; unsigned long pos; - unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; -- cgit v1.2.3-70-g09d2 From 5dd61dfabcaa5bfb67afb8a2d255bd1e156562e3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 17:04:57 +0200 Subject: x86: rename do_not_nx to disable_nx in mm/init_64.c As a preparational step for unifying noexec handling on 32-bit and 64-bit, rename the do_not_nx variable to disable_nx on 64-bit. Signed-off-by: Pekka Enberg LKML-Reference: <1236265497.31324.11.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8a853bc3b28..54efa57d1c0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -85,7 +85,7 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int do_not_nx __cpuinitdata; +static int disable_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; + disable_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; + disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) + if (!(efer & EFER_NX) || disable_nx) __supported_pte_mask &= ~_PAGE_NX; } -- cgit v1.2.3-70-g09d2 From 8827247ffcc9e880cbe4705655065cf011265157 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Sat, 7 Mar 2009 13:34:19 +0800 Subject: x86: don't define __this_fixmap_does_not_exist() Impact: improve out-of-range fixmap index debugging Commit "1b42f51630c7eebce6fb780b480731eb81afd325" defined the __this_fixmap_does_not_exist() function with a WARN_ON(1) in it. This causes the linker to not report an error when __this_fixmap_does_not_exist() is called with a non-constant parameter. Ingo defined __this_fixmap_does_not_exist() because he wanted to get virt addresses of fix memory of nest level by non-constant index. But we can fix this and still keep the link-time check: We can get the four slot virt addresses on link time and store them to array slot_virt[]. Then we can then refer the slot_virt with non-constant index, in the ioremap-leak detection code. Signed-off-by: Wang Chen LKML-Reference: <49B2075B.4070509@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf08..96786ef2c9a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -504,13 +504,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + void __init early_ioremap_init(void) { pmd_t *pmd; + int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -577,6 +583,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) { int count = 0; @@ -598,7 +605,8 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem * +__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -664,9 +672,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); return prev_map[slot]; } @@ -734,8 +742,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} -- cgit v1.2.3-70-g09d2 From e954ef20c29b7af07a8cb5452f14fb69e3d9d2b2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 5 Mar 2009 12:04:57 -0800 Subject: x86: fix warning about nodeid Impact: cleanup Ingo found there warning about nodeid with some configs. try to use for_each_online_node for non numa too. in that case nodeid will be 0. also move out boundary checking from setup_node_bootmem(), so non-numa config will not check it. Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49B03069.80001@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2966c6b8d30..db81e9a8556 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,11 +806,6 @@ static unsigned long __init setup_node_bootmem(int nodeid, { unsigned long bootmap_size; - if (start_pfn > max_low_pfn) - return bootmap; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; - /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap >> PAGE_SHIFT, @@ -843,13 +838,23 @@ void __init setup_bootmem_allocator(void) max_pfn_mapped< max_low_pfn) + continue; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; #else - bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); + start_pfn = 0; + end_pfn = max_low_pfn; #endif + bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, + bootmap); + } after_bootmem = 1; } -- cgit v1.2.3-70-g09d2 From d0fc63f7bd07cb779a06dc1cdd0c5a14e7f5d562 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Sun, 8 Mar 2009 20:21:35 +0200 Subject: x86 mmiotrace: fix remove_kmmio_fault_pages() Impact: fix race+crash in mmiotrace The list manipulation in remove_kmmio_fault_pages() was broken. If more than one consecutive kmmio_fault_page was re-added during the grace period between unregister_kmmio_probe() and remove_kmmio_fault_pages(), the list manipulation failed to remove pages from the release list. After a second grace period the pages get into rcu_free_kmmio_fault_pages() and raise a BUG_ON() kernel crash. The list manipulation is fixed to properly remove pages from the release list. This bug has been present from the very beginning of mmiotrace in the mainline kernel. It was introduced in 0fd0e3da ("x86: mmiotrace full patch, preview 1"); An urgent fix for Linus. Tested by Stuart (on 32-bit) and Pekka (on amd and intel 64-bit systems, nouveau and nvidia proprietary). Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen LKML-Reference: <20090308202135.34933feb@daedalus.pq.iki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9a..6a518dd08a3 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } -- cgit v1.2.3-70-g09d2 From 0feca851c1b3cb4ebfa3149144b3d5de0879ebaa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Mar 2009 10:09:26 -0800 Subject: x86-32: make sure virt_addr_valid() returns false for fixmap addresses I found that virt_addr_valid() was returning true for fixmap addresses. I'm not sure whether pfn_valid() is supposed to include this test, but there's no harm in being explicit. Signed-off-by: Jeremy Fitzhardinge Cc: Jiri Slaby Cc: Yinghai Lu LKML-Reference: <49B166D6.2080505@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf08..62def579573 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -87,6 +87,8 @@ bool __virt_addr_valid(unsigned long x) return false; if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; + if (x >= FIXADDR_START) + return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); -- cgit v1.2.3-70-g09d2 From bb6d59ca927d855ffac567b35c0a790c67016103 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 11 Mar 2009 23:33:18 +0900 Subject: x86: unify kmap_atomic_pfn() and iomap_atomic_prot_pfn() kmap_atomic_pfn() and iomap_atomic_prot_pfn() are almost same except pgprot. This patch removes the code duplication for these two functions. Signed-off-by: Akinobu Mita LKML-Reference: <20090311143317.GA22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/highmem.h | 1 + arch/x86/mm/highmem_32.c | 17 +++++++++++------ arch/x86/mm/iomap_32.c | 13 ++----------- 3 files changed, 14 insertions(+), 17 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index bf9276bea66..014c2b85ae4 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d11745334a6..ae4c8dae266 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,23 +121,28 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -/* This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); - idx = type + KM_TYPE_NR*smp_processor_id(); + idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); arch_flush_lazy_mmu_mode(); return (void*) vaddr; } + +/* This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + return kmap_atomic_prot_pfn(pfn, type, kmap_prot); +} EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ struct page *kmap_atomic_to_page(void *ptr) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d42ff4..592984e5496 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -18,6 +18,7 @@ #include #include +#include #include int is_io_mapping_possible(resource_size_t base, unsigned long size) @@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible); void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the @@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; + return kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); -- cgit v1.2.3-70-g09d2 From 12074fa1073013dd11f1cff41db018d5cff4ecd9 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 11 Mar 2009 23:34:50 +0900 Subject: x86: debug check for kmap_atomic_pfn and iomap_atomic_prot_pfn() It may be useful for kmap_atomic_pfn() and iomap_atomic_prot_pfn() to check invalid kmap usage as well as kmap_atomic. Signed-off-by: Akinobu Mita LKML-Reference: <20090311143449.GB22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index ae4c8dae266..f256e73542d 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -128,6 +128,8 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) pagefault_disable(); + debug_kmap_atomic_prot(type); + idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); -- cgit v1.2.3-70-g09d2 From 211b3d03c7400f48a781977a50104c9d12f4e229 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Mar 2009 22:31:03 +0100 Subject: x86: work around Fedora-11 x86-32 kernel failures on Intel Atom CPUs Impact: work around boot crash Work around Intel Atom erratum AAH41 (probabilistically) - it's triggering in the field. Reported-by: Linus Torvalds Tested-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7be47d1a97e..7233bd7e357 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -515,6 +515,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) * primary protection behavior: */ __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); + + /* + * Intel Atom errata AAH41 workaround. + * + * The real fix should be in hw or in a microcode update, but + * we also probabilistically try to reduce the window of having + * a large TLB mixed with 4K TLBs while instruction fetches are + * going on. + */ + __flush_tlb_all(); + base = NULL; out_unlock: -- cgit v1.2.3-70-g09d2 From afcfe024aebd74b0984a41af9a34e009cf5badaf Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Wed, 11 Mar 2009 20:29:45 +0000 Subject: x86: mmiotrace: quieten spurious warning message This message was being incorrectly emitted when using gdb, so compile it out by default for now; there will be a better fix in v2.6.30. Reported-by: Ingo Molnar Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a518dd08a3..4f115e00486 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_warning("kmmio: spurious debug trap on CPU %d.\n", + pr_debug("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } -- cgit v1.2.3-70-g09d2 From dd63fdcc63f0f853b116b52e56200a0e0227cf5f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 03:20:49 +0100 Subject: x86: unify kmap_atomic_pfn() and iomap_atomic_prot_pfn(), fix Impact: build fix Move kmap_atomic_prot_pfn() to iomap_32.c. It is used on all 32-bit kernels, while highmem_32.c is only built on highmem kernels. ( Note: the debug_kmap_atomic_prot() check is removed for now, that problem is handled via another patch. ) Reported-by: Thomas Gleixner Cc: Akinobu Mita LKML-Reference: <20090311143317.GA22244@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 20 ++------------------ arch/x86/mm/iomap_32.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index f256e73542d..522db5e3d0b 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -121,24 +121,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - - debug_kmap_atomic_prot(type); - - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; -} - -/* This is the same as kmap_atomic() but can map memory that doesn't +/* + * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 592984e5496..6e60ba698ce 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -32,7 +32,23 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) } EXPORT_SYMBOL_GPL(is_io_mapping_possible); -/* Map 'pfn' using fixed map 'type' and protections 'prot' +void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +} + +/* + * Map 'pfn' using fixed map 'type' and protections 'prot' */ void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) -- cgit v1.2.3-70-g09d2 From 13c6c53282d99c82e79b02477efd2c1e30a991ef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:37:34 +0000 Subject: x86, 32-bit: also use cpuinfo_x86's x86_{phys,virt}_bits members Impact: 32/64-bit consolidation In a first step, this allows fixing phys_addr_valid() for PAE (which until now reported all addresses to be valid). Subsequently, this will also allow simplifying some MTRR handling code. Signed-off-by: Jan Beulich LKML-Reference: <49B9101E.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 12 +++++++++++- arch/x86/kernel/cpu/intel.c | 5 +++++ arch/x86/mm/ioremap.c | 17 ++++++++--------- 4 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e..bd3406db1d6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; -#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c87627..a95e9480bb9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -549,13 +549,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_X86_64 if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#ifdef CONFIG_X86_32 + else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; #endif if (c->extended_cpuid_level >= 0x80000007) @@ -602,8 +604,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; @@ -726,9 +732,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 191117f1ad5..ae769471042 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -54,6 +54,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = 128; #endif + /* CPUID workaround for 0F33/0F34 CPU */ + if (c->x86 == 0xF && c->x86_model == 0x3 + && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + c->x86_phys_bits = 36; + /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index aca924a30ee..83ed74affba 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,13 +22,17 @@ #include #include -#ifdef CONFIG_X86_64 - -static inline int phys_addr_valid(unsigned long addr) +static inline int phys_addr_valid(resource_size_t addr) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif } +#ifdef CONFIG_X86_64 + unsigned long __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { @@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid); #else -static inline int phys_addr_valid(unsigned long addr) -{ - return 1; -} - #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { -- cgit v1.2.3-70-g09d2 From dc9dd5cc854cde110d2421f3a11fec7597e059c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:40:06 +0000 Subject: x86: move save_mr() into .meminit.text Impact: cleanup, save memory The function is only being called from boot or memory hotplug paths. Signed-off-by: Jan Beulich LKML-Reference: <49B910B6.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 15219e0d124..fd3da1dda1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -94,9 +94,9 @@ struct map_range { #define NR_RANGE_MR 5 #endif -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) +static int __meminit save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) { if (start_pfn < end_pfn) { if (nr_range >= NR_RANGE_MR) -- cgit v1.2.3-70-g09d2 From 698609bdcd35d0641f4c6622c83680ab1a6d67cb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 13:11:50 +0000 Subject: x86: create a non-zero sized bm_pte only when needed Impact: kernel image size reduction Since in most configurations the pmd page needed maps the same range of virtual addresses which is also mapped by the earlier inserted one for covering FIX_DBGP_BASE, that page (and its insertion in the page tables) can be avoided altogether by detecting the condition at compile time. Signed-off-by: Jan Beulich LKML-Reference: <49B91826.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 83ed74affba..55e127f71ed 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -487,7 +487,12 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; +#define __FIXADDR_TOP (-PAGE_SIZE) +static pte_t bm_pte[(__fix_to_virt(FIX_DBGP_BASE) + ^ __fix_to_virt(FIX_BTMAP_BEGIN)) >> PMD_SHIFT + ? PAGE_SIZE / sizeof(pte_t) : 0] __page_aligned_bss; +#undef __FIXADDR_TOP +static __initdata pte_t *bm_ptep; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -502,6 +507,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) static inline pte_t * __init early_ioremap_pte(unsigned long addr) { + if (!sizeof(bm_pte)) + return &bm_ptep[pte_index(addr)]; return &bm_pte[pte_index(addr)]; } @@ -519,8 +526,14 @@ void __init early_ioremap_init(void) slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + if (sizeof(bm_pte)) { + memset(bm_pte, 0, sizeof(bm_pte)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + } else { + bm_ptep = pte_offset_kernel(pmd, 0); + if (early_ioremap_debug) + printk(KERN_INFO "bm_ptep=%p\n", bm_ptep); + } /* * The boot-ioremap range spans multiple pmds, for which -- cgit v1.2.3-70-g09d2 From 4bb9c5c02153dfc89a6c73a6f32091413805ad7d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 12 Mar 2009 17:45:27 -0700 Subject: VM, x86, PAT: Change is_linear_pfn_mapping to not use vm_pgoff Impact: fix false positive PAT warnings - also fix VirtalBox hang Use of vma->vm_pgoff to identify the pfnmaps that are fully mapped at mmap time is broken. vm_pgoff is set by generic mmap code even for cases where drivers are setting up the mappings at the fault time. The problem was originally reported here: http://marc.info/?l=linux-kernel&m=123383810628583&w=2 Change is_linear_pfn_mapping logic to overload VM_INSERTPAGE flag along with VM_PFNMAP to mean full PFNMAP setup at mmap time. Problem also tracked at: http://bugzilla.kernel.org/show_bug.cgi?id=12800 Reported-by: Thomas Hellstrom Tested-by: Frans Pop Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha @intel.com> Cc: Nick Piggin Cc: "ebiederm@xmission.com" Cc: # only for 2.6.29.1, not .28 LKML-Reference: <20090313004527.GA7176@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 5 +++-- include/linux/mm.h | 15 +++++++++++++-- mm/memory.c | 6 ++++-- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e0ab173b697..21bc1f787ae 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -641,10 +641,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. + * reserve_pfn_range() doesn't support RAM pages. Maintain the current + * behavior with RAM pages by returning success. */ if (is_ram != 0) - return -EINVAL; + return 0; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) diff --git a/include/linux/mm.h b/include/linux/mm.h index 065cdf8c09f..3daa05feed9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,7 +98,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ @@ -126,6 +126,17 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +/* + * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). + * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. + * Note VM_INSERTPAGE flag is overloaded here. i.e, + * VM_INSERTPAGE && !VM_PFNMAP implies + * The vma has had "vm_insert_page()" done on it + * VM_INSERTPAGE && VM_PFNMAP implies + * The vma is PFNMAP with full mapping at mmap time + */ +#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -145,7 +156,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); + return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) diff --git a/mm/memory.c b/mm/memory.c index baa999e87cd..d7df5babcba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1665,9 +1665,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ - if (addr == vma->vm_start && end == vma->vm_end) + if (addr == vma->vm_start && end == vma->vm_end) { vma->vm_pgoff = pfn; - else if (is_cow_mapping(vma->vm_flags)) + vma->vm_flags |= VM_PFNMAP_AT_MMAP; + } else if (is_cow_mapping(vma->vm_flags)) return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; @@ -1679,6 +1680,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * needed from higher level routine calling unmap_vmas */ vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); + vma->vm_flags &= ~VM_PFNMAP_AT_MMAP; return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 93dbda7cbcd70a0bd1a99f39f44a9ccde8ab9040 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 26 Feb 2009 17:35:44 -0800 Subject: x86: add brk allocation for very, very early allocations Impact: new interface Add a brk()-like allocator which effectively extends the bss in order to allow very early code to do dynamic allocations. This is better than using statically allocated arrays for data in subsystems which may never get used. The space for brk allocations is in the bss ELF segment, so that the space is mapped properly by the code which maps the kernel, and so that bootloaders keep the space free rather than putting a ramdisk or something into it. The bss itself, delimited by __bss_stop, ends before the brk area (__brk_base to __brk_limit). The kernel text, data and bss is reserved up to __bss_stop. Any brk-allocated data is reserved separately just before the kernel pagetable is built, as that code allocates from unreserved spaces in the e820 map, potentially allocating from any unused brk memory. Ultimately any unused memory in the brk area is used in the general kernel memory pool. Initially the brk space is set to 1MB, which is probably much larger than any user needs (the largest current user is i386 head_32.S's code to build the pagetables to map the kernel, which can get fairly large with a big kernel image and no PSE support). So long as the system has sufficient memory for the bootloader to reserve the kernel+1MB brk, there are no bad effects resulting from an over-large brk. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sections.h | 7 +++++++ arch/x86/include/asm/setup.h | 4 ++++ arch/x86/kernel/head32.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/setup.c | 39 ++++++++++++++++++++++++++++++++++----- arch/x86/kernel/vmlinux_32.lds.S | 7 +++++++ arch/x86/kernel/vmlinux_64.lds.S | 5 +++++ arch/x86/mm/pageattr.c | 5 +++-- arch/x86/xen/mmu.c | 6 +++--- 9 files changed, 65 insertions(+), 12 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 2b8c5160388..1b7ee5d673c 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -1 +1,8 @@ +#ifndef _ASM_X86_SECTIONS_H +#define _ASM_X86_SECTIONS_H + #include + +extern char __brk_base[], __brk_limit[]; + +#endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 05c6f6b11fd..45454f3fa12 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,6 +100,10 @@ extern struct boot_params boot_params; */ #define LOWMEMSIZE() (0x9f000) +/* exceedingly early brk-like allocator */ +extern unsigned long _brk_end; +void *extend_brk(size_t size, size_t align); + #ifdef __i386__ void __init i386_start_kernel(void); diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index ac108d1fe18..29f1095b084 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -18,7 +18,7 @@ void __init i386_start_kernel(void) { reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f5b27224769..70eaa852c73 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data) reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f28c56e6bf9..e6b742d2a3f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,9 @@ unsigned int boot_cpu_id __read_mostly; +static __initdata unsigned long _brk_start = (unsigned long)__brk_base; +unsigned long _brk_end = (unsigned long)__brk_base; + #ifdef CONFIG_X86_64 int default_cpu_present_to_apicid(int mps_cpu) { @@ -337,6 +340,34 @@ static void __init relocate_initrd(void) } #endif +void * __init extend_brk(size_t size, size_t align) +{ + size_t mask = align - 1; + void *ret; + + BUG_ON(_brk_start == 0); + BUG_ON(align & mask); + + _brk_end = (_brk_end + mask) & ~mask; + BUG_ON((char *)(_brk_end + size) > __brk_limit); + + ret = (void *)_brk_end; + _brk_end += size; + + memset(ret, 0, size); + + return ret; +} + +static void __init reserve_brk(void) +{ + if (_brk_end > _brk_start) + reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + + /* Mark brk area as locked down and no longer taking any new allocations */ + _brk_start = 0; +} + static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; @@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; -#ifdef CONFIG_X86_32 - init_mm.brk = init_pg_tables_end + PAGE_OFFSET; -#else - init_mm.brk = (unsigned long) &_end; -#endif + init_mm.brk = _brk_end; code_resource.start = virt_to_phys(_text); code_resource.end = virt_to_phys(_etext)-1; @@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif + reserve_brk(); + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< #include #include +#include #include #include #include @@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the high * mapping already: */ - if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) + if (within(vaddr, (unsigned long) _text, _brk_end)) return 0; /* diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4ec95..72f6a76dbfb 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, { pmd_t *kernel_pmd; - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + + xen_start_info->nr_pt_frames * PAGE_SIZE + + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3-70-g09d2 From 0920dce7d5889634faa336f65833ee44f3b7651e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 16 Mar 2009 00:15:18 +0900 Subject: x86, mm: remove unnecessary include file from iomap_32.c asm/highmem.h inclusion is added to use kmap_atomic_prot_pfn() by commit bb6d59ca927d855ffac567b35c0a790c67016103 Now kmap_atomic_prot_pfn is moved to iomap_32.c by commit dd63fdcc63f0f853b116b52e56200a0e0227cf5f So the asm/highmem.h inclusion in iomap_32.c is unnecessary now. Signed-off-by: Akinobu Mita LKML-Reference: <20090315151517.GA29074@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/mm/iomap_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6e60ba698ce..699c9b2895a 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -18,7 +18,6 @@ #include #include -#include #include int is_io_mapping_possible(resource_size_t base, unsigned long size) -- cgit v1.2.3-70-g09d2 From ce4e240c279a31096f74afa6584a62d64a1ba8c8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 17 Mar 2009 10:16:54 -0800 Subject: x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths Impact: optimize APIC IPI related barriers Uncached MMIO accesses for xapic are inherently serializing and hence we don't need explicit barriers for xapic IPI paths. x2apic MSR writes/reads don't have serializing semantics and hence need a serializing instruction or mfence, to make all the previous memory stores globally visisble before the x2apic msr write for IPI. Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths. Signed-off-by: Suresh Siddha Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Jens Axboe Cc: Linus Torvalds Cc: "Paul E. McKenney" Cc: Rusty Russell Cc: Steven Rostedt Cc: "steiner@sgi.com" Cc: Nick Piggin LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 10 ++++++++++ arch/x86/kernel/apic/x2apic_cluster.c | 6 ++++++ arch/x86/kernel/apic/x2apic_phys.c | 6 ++++++ arch/x86/mm/tlb.c | 5 ----- 4 files changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6d5b6f0900e..00f5962d82d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); #ifdef CONFIG_X86_X2APIC +/* + * Make previous memory operations globally visible before + * sending the IPI through x2apic wrmsr. We need a serializing instruction or + * mfence for this. + */ +static inline void x2apic_wrmsr_fence(void) +{ + asm volatile("mfence" : : : "memory"); +} + static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8fb87b6dd63..4a903e2f0d1 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu == this_cpu) @@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 23625b9f98b..a284359627e 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu != this_cpu) @@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e448..821e97017e9 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -186,11 +186,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id())); - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.3-70-g09d2