From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031..8927c878544 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long new_prot_val; pgprot_t prot; int retval; + void __iomem *ret_addr; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -233,7 +235,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - return (void __iomem *) (vaddr + offset); + ret_addr = (void __iomem *) (vaddr + offset); + mmiotrace_ioremap(phys_addr, size, ret_addr); + + return ret_addr; } /** @@ -325,6 +330,8 @@ void iounmap(volatile void __iomem *addr) addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); + mmiotrace_iounmap(addr); + /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by -- cgit v1.2.3-70-g09d2 From 87e547fe41a8b57d6d80afc67a0031fbe477eb0d Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: fix page-unaligned ioremaps mmiotrace_ioremap() expects to receive the original unaligned map phys address and size. Also fix {un,}register_kmmio_probe() to deal properly with unaligned size. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 +++- arch/x86/mm/kmmio.c | 13 +++++++++++-- arch/x86/mm/mmio-mod.c | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8927c878544..a7c80a6e862 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -123,6 +123,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, { unsigned long pfn, offset, vaddr; resource_size_t last_addr; + const resource_size_t unaligned_phys_addr = phys_addr; + const unsigned long unaligned_size = size; struct vm_struct *area; unsigned long new_prot_val; pgprot_t prot; @@ -236,7 +238,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, } ret_addr = (void __iomem *) (vaddr + offset); - mmiotrace_ioremap(phys_addr, size, ret_addr); + mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a92d9111b6..93b1797666c 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -351,11 +351,19 @@ static void release_kmmio_fault_page(unsigned long page, } } +/* + * With page-unaligned ioremaps, one or two armed pages may contain + * addresses from outside the intended mapping. Events for these addresses + * are currently silently dropped. The events may result only from programming + * mistakes by accessing addresses before the beginning or past the end of a + * mapping. + */ int register_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; int ret = 0; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { @@ -364,7 +372,7 @@ int register_kmmio_probe(struct kmmio_probe *p) } kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); - while (size < p->len) { + while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; @@ -436,11 +444,12 @@ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; spin_lock_irqsave(&kmmio_lock, flags); - while (size < p->len) { + while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index a8d2a0019da..278998c1998 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -280,6 +280,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + /* These are page-unaligned. */ struct mmiotrace_map map = { .phys = offset, .virt = (unsigned long)addr, -- cgit v1.2.3-70-g09d2 From 499f8f84b8324ba27d756e03f373fa16eeed9ccc Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 10 Jun 2008 16:06:21 +0200 Subject: x86: rename pat_wc_enabled to pat_enabled BTW, what does pat_wc_enabled stand for? Does it mean "write-combining"? Currently it is used to globally switch on or off PAT support. Thus I renamed it to pat_enabled. I think this increases readability (and hope that I didn't miss something). Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 ++-- arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pat.c | 16 ++++++++-------- arch/x86/pci/i386.c | 4 ++-- include/asm-x86/pat.h | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031..ddeafed1171 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. @@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { - if (pat_wc_enabled) + if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, __builtin_return_address(0)); else diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37..6916fe4bf0c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -805,7 +805,7 @@ int _set_memory_wc(unsigned long addr, int numpages) int set_memory_wc(unsigned long addr, int numpages) { - if (!pat_wc_enabled) + if (!pat_enabled) return set_memory_uc(addr, numpages); if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index a8b69bb2697..4beccea0897 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -26,11 +26,11 @@ #include #ifdef CONFIG_X86_PAT -int __read_mostly pat_wc_enabled = 1; +int __read_mostly pat_enabled = 1; void __cpuinit pat_disable(char *reason) { - pat_wc_enabled = 0; + pat_enabled = 0; printk(KERN_INFO "%s\n", reason); } @@ -72,7 +72,7 @@ void pat_init(void) { u64 pat; - if (!pat_wc_enabled) + if (!pat_enabled) return; /* Paranoia check. */ @@ -225,8 +225,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long actual_type; int err = 0; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + /* Only track when pat_enabled */ + if (!pat_enabled) { /* This is identical to page table setting without PAT */ if (ret_type) { if (req_type == -1) { @@ -440,8 +440,8 @@ int free_memtype(u64 start, u64 end) struct memtype *ml; int err = -EINVAL; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + /* Only track when pat_enabled */ + if (!pat_enabled) { return 0; } @@ -535,7 +535,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_wc_enabled && + if (!pat_enabled && !(boot_cpu_has(X86_FEATURE_MTRR) || boot_cpu_has(X86_FEATURE_K6_MTRR) || boot_cpu_has(X86_FEATURE_CYRIX_ARR) || diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 10fb308fded..6ccd7a108cd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -299,9 +299,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); - if (pat_wc_enabled && write_combine) + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; - else if (pat_wc_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h index 6fa9710ae09..7edc4730721 100644 --- a/include/asm-x86/pat.h +++ b/include/asm-x86/pat.h @@ -4,10 +4,10 @@ #include #ifdef CONFIG_X86_PAT -extern int pat_wc_enabled; +extern int pat_enabled; extern void validate_pat_support(struct cpuinfo_x86 *c); #else -static const int pat_wc_enabled; +static const int pat_enabled; static inline void validate_pat_support(struct cpuinfo_x86 *c) { } #endif -- cgit v1.2.3-70-g09d2 From bcc643dc287cb732e96a1685ac130c3ae8b1c960 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 20 Jun 2008 21:58:46 +0200 Subject: x86: introduce macro to check whether an address range is in the ISA range Signed-off-by: Andreas Herrmann Cc: Venkatesh Pallipadi Cc: Suresh B Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/pat.c | 5 ++--- include/asm-x86/e820.h | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 7452eb31ed1..6d9960dd6f3 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't remap the low PCI/ISA area, it's always mapped.. */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + if (is_ISA_range(phys_addr, last_addr)) return (__force void __iomem *)phys_to_virt(phys_addr); /* diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 227df3ca9bf..5bbc22efe4e 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -215,7 +215,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, } /* Low ISA region is always mapped WB in page table. No need to track */ - if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { + if (is_ISA_range(start, end - 1)) { if (ret_type) *ret_type = _PAGE_CACHE_WB; @@ -415,9 +415,8 @@ int free_memtype(u64 start, u64 end) } /* Low ISA region is always mapped WB. No need to track */ - if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { + if (is_ISA_range(start, end - 1)) return 0; - } spin_lock(&memtype_lock); list_for_each_entry(ml, &memtype_list, nd) { diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 7004251fc66..5103d0b2c46 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -24,6 +24,7 @@ struct e820map { #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 +#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) #define BIOS_BEGIN 0x000a0000 #define BIOS_END 0x00100000 -- cgit v1.2.3-70-g09d2 From 6e92a5a6151f4a467b8c1bde9123c0a9d1a63339 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: add sparse annotations to ioremap arch/x86/mm/ioremap.c:308:11: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f9b68..01d76426971 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr) * vm_area and by simply returning an address into the kernel mapping * of ISA space. So handle that here. */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) + if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && + (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) return; addr = (volatile void __iomem *) @@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr) cpa takes care of the direct mappings. */ read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) { - if (p->addr == addr) + if (p->addr == (void __force *)addr) break; } read_unlock(&vmlist_lock); @@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr) free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ - o = remove_vm_area((void *)addr); + o = remove_vm_area((void __force *)addr); BUG_ON(p != o || o == NULL); kfree(p); } @@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void *)ioremap(start, PAGE_SIZE); + addr = (void __force *)ioremap(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); -- cgit v1.2.3-70-g09d2 From a7bf0bd5e6af7fe69342dabf2a3b721f0163469a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 May 2008 15:02:14 +0100 Subject: build: add __page_aligned_data and __page_aligned_bss Making a variable page-aligned by using __attribute__((section(".data.page_aligned"))) is fragile because if sizeof(variable) is not also a multiple of page size, it leaves variables in the remainder of the section unaligned. This patch introduces two new qualifiers, __page_aligned_data and __page_aligned_bss to set the section *and* the alignment of variables. This makes page-aligned variables more robust because the linker will make sure they're aligned properly. Unfortunately it requires *all* page-aligned data to use these macros... Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup64.c | 2 +- arch/x86/mm/ioremap.c | 3 +-- include/linux/linkage.h | 4 ++++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 631ea6cc01d..fc1a56da824 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL(_cpu_pda); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; unsigned long __supported_pte_mask __read_mostly = ~0UL; EXPORT_SYMBOL_GPL(__supported_pte_mask); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 416ea415f5c..0561fde56a5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -394,8 +394,7 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] - __section(.bss.page_aligned); +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f..9fd1f859021 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,6 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H +#include #include #ifdef __cplusplus @@ -17,6 +18,9 @@ # define asmregparm #endif +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. -- cgit v1.2.3-70-g09d2 From 4583ed514ea9ac844a6eb02d33120beaedf6837f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Jun 2008 00:19:03 -0400 Subject: x86, 64-bit: unify early_ioremap The 32-bit early_ioremap will work equally well for 64-bit, so just use it. Signed-off-by: Jeremy Fitzhardinge Cc: xen-devel Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 52 --------------------------------------------- arch/x86/mm/ioremap.c | 5 +---- include/asm-x86/fixmap_64.h | 13 ++++++++++++ include/asm-x86/io.h | 13 ++++++++++++ include/asm-x86/io_32.h | 12 ----------- 5 files changed, 27 insertions(+), 68 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bfea7605eaa..9b6049133a8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -257,58 +257,6 @@ static __meminit void unmap_low_page(void *adr) early_iounmap(adr, PAGE_SIZE); } -/* Must run before zap_low_mappings */ -__meminit void *early_ioremap(unsigned long addr, unsigned long size) -{ - pmd_t *pmd, *last_pmd; - unsigned long vaddr; - int i, pmds; - - pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; - vaddr = __START_KERNEL_map; - pmd = level2_kernel_pgt; - last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; - - for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { - for (i = 0; i < pmds; i++) { - if (pmd_present(pmd[i])) - goto continue_outer_loop; - } - vaddr += addr & ~PMD_MASK; - addr &= PMD_MASK; - - for (i = 0; i < pmds; i++, addr += PMD_SIZE) - set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - __flush_tlb_all(); - - return (void *)vaddr; -continue_outer_loop: - ; - } - printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size); - - return NULL; -} - -/* - * To avoid virtual aliases later: - */ -__meminit void early_iounmap(void *addr, unsigned long size) -{ - unsigned long vaddr; - pmd_t *pmd; - int i, pmds; - - vaddr = (unsigned long)addr; - pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; - pmd = level2_kernel_pgt + pmd_index(vaddr); - - for (i = 0; i < pmds; i++) - pmd_clear(pmd + i); - - __flush_tlb_all(); -} - static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0561fde56a5..092b3d72498 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -381,8 +381,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -#ifdef CONFIG_X86_32 - int __initdata early_ioremap_debug; static int __init early_ioremap_debug_setup(char *str) @@ -483,6 +481,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, return; } pte = early_ioremap_pte(addr); + if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else @@ -624,5 +623,3 @@ void __this_fixmap_does_not_exist(void) { WARN_ON(1); } - -#endif /* CONFIG_X86_32 */ diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h index 626098823a0..c7dcc365f47 100644 --- a/include/asm-x86/fixmap_64.h +++ b/include/asm-x86/fixmap_64.h @@ -49,6 +49,19 @@ enum fixed_addresses { #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif + __end_of_permanent_fixed_addresses, + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 512 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_NESTING 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - + (__end_of_permanent_fixed_addresses & 511), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, __end_of_fixed_addresses }; diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index 8e9eca93f9b..c63563df4ac 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -72,4 +72,17 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void early_ioremap_init(void); +extern void early_ioremap_clear(void); +extern void early_ioremap_reset(void); +extern void *early_ioremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + + #endif /* _ASM_X86_IO_H */ diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index d71be8df979..4df44ed5407 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -121,18 +121,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) extern void iounmap(volatile void __iomem *addr); -/* - * early_ioremap() and early_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void early_ioremap_init(void); -extern void early_ioremap_clear(void); -extern void early_ioremap_reset(void); -extern void *early_ioremap(unsigned long offset, unsigned long size); -extern void early_iounmap(void *addr, unsigned long size); -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); - /* * ISA I/O bus memory addresses are 1:1 with the physical address. */ -- cgit v1.2.3-70-g09d2 From 4f9c11dd49fb73e1ec088b27ed6539681a445988 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Jun 2008 00:19:19 -0400 Subject: x86, 64-bit: adjust mapping of physical pagetables to work with Xen This makes a few of changes to the construction of the initial pagetables to work better with paravirt_ops/Xen. The main areas are: 1. Support non-PSE mapping of memory, since Xen doesn't currently allow 2M pages to be mapped in guests. 2. Make sure that the ioremap alias of all pages are dropped before attaching the new page to the pagetable. This avoids having writable aliases of pagetable pages. 3. Preserve existing pagetable entries, rather than overwriting. Its possible that a fair amount of pagetable has already been constructed, so reuse what's already in place rather than ignoring and overwriting it. The algorithm relies on the invariant that any page which is part of the kernel pagetable is itself mapped in the linear memory area. This way, it can avoid using ioremap on a pagetable page. The invariant holds because it maps memory from low to high addresses, and also allocates memory from low to high. Each allocated page can map at least 2M of address space, so the mapped area will always progress much faster than the allocated area. It relies on the early boot code mapping enough pages to get started. Signed-off-by: Jeremy Fitzhardinge Cc: xen-devel Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 94 ++++++++++++++++++++++++++++++++++++++++++++------- arch/x86/mm/ioremap.c | 2 +- 2 files changed, 83 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d5d4b04d48a..363751dc3fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -253,6 +253,43 @@ static __meminit void unmap_low_page(void *adr) early_iounmap(adr, PAGE_SIZE); } +static void __meminit +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) +{ + unsigned pages = 0; + int i; + pte_t *pte = pte_page + pte_index(addr); + + for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { + + if (addr >= end) { + if (!after_bootmem) { + for(; i < PTRS_PER_PTE; i++, pte++) + set_pte(pte, __pte(0)); + } + break; + } + + if (pte_val(*pte)) + continue; + + if (0) + printk(" pte=%p addr=%lx pte=%016lx\n", + pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); + set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); + pages++; + } + update_page_count(PG_LEVEL_4K, pages); +} + +static void __meminit +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + phys_pte_init(pte, address, end); +} + static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { @@ -261,7 +298,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { + unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); + pte_t *pte; if (address >= end) { if (!after_bootmem) { @@ -271,12 +310,23 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) break; } - if (pmd_val(*pmd)) + if (pmd_val(*pmd)) { + phys_pte_update(pmd, address, end); + continue; + } + + if (cpu_has_pse) { + pages++; + set_pte((pte_t *)pmd, + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); continue; + } - pages++; - set_pte((pte_t *)pmd, - pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + pte = alloc_low_page(&pte_phys); + phys_pte_init(pte, address, end); + unmap_low_page(pte); + + pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); } update_page_count(PG_LEVEL_2M, pages); return address; @@ -333,11 +383,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) pmd = alloc_low_page(&pmd_phys); spin_lock(&init_mm.page_table_lock); - pud_populate(&init_mm, pud, __va(pmd_phys)); last_map_addr = phys_pmd_init(pmd, addr, end); + unmap_low_page(pmd); + pud_populate(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); - unmap_low_page(pmd); } __flush_tlb_all(); update_page_count(PG_LEVEL_1G, pages); @@ -345,16 +395,30 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) return last_map_addr; } +static unsigned long __meminit +phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + pud_t *pud; + + pud = (pud_t *)pgd_page_vaddr(*pgd); + + return phys_pud_init(pud, addr, end); +} + static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, tables, start; + unsigned long puds, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); if (!direct_gbpages) { - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); } + if (!cpu_has_pse) { + unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); + } /* * RED-PEN putting page tables only on node 0 could @@ -526,19 +590,25 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon unsigned long pud_phys; pud_t *pud; + next = start + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_val(*pgd)) { + last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end)); + continue; + } + if (after_bootmem) pud = pud_offset(pgd, start & PGDIR_MASK); else pud = alloc_low_page(&pud_phys); - next = start + PGDIR_SIZE; - if (next > end) - next = end; last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); + unmap_low_page(pud); if (!after_bootmem) pgd_populate(&init_mm, pgd_offset_k(start), __va(pud_phys)); - unmap_low_page(pud); } if (!after_bootmem) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 092b3d72498..45e546c4ba7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -485,7 +485,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else - pte_clear(NULL, addr, pte); + pte_clear(&init_mm, addr, pte); __flush_tlb_one(addr); } -- cgit v1.2.3-70-g09d2 From a361ee5cb8011763ece7b4add393e206439db8b3 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 10 Jul 2008 10:09:59 +0200 Subject: x86: fix /dev/mem compatibility under PAT Add ioremap_default(), which gives a sane mapping without worrying about type conflicts. Use it in /dev/mem read in place of ioremap(), as with ioremap(), any mapping of the region (other than UC_MINUS) will cause a conflict and failure of /dev/mem read. Should address the vbetest failure reported at: http://bugzilla.kernel.org/show_bug.cgi?id=11057 Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/ioremap.c') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f9b68..d1b867101e5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -300,6 +300,29 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_cache); +static void __iomem *ioremap_default(resource_size_t phys_addr, + unsigned long size) +{ + unsigned long flags; + void *ret; + int err; + + /* + * - WB for WB-able memory and no other conflicting mappings + * - UC_MINUS for non-WB-able memory with no other conflicting mappings + * - Inherit from confliting mappings otherwise + */ + err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); + if (err < 0) + return NULL; + + ret = (void *) __ioremap_caller(phys_addr, size, flags, + __builtin_return_address(0)); + + free_memtype(phys_addr, phys_addr + size); + return (void __iomem *)ret; +} + /** * iounmap - Free a IO remapping * @addr: virtual address from ioremap_* @@ -365,7 +388,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void *)ioremap(start, PAGE_SIZE); + addr = (void *)ioremap_default(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); -- cgit v1.2.3-70-g09d2