From 734269521e320ad14ed39ae9b64d482b9028dcd2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:07 +0900 Subject: vmalloc: call flush_cache_vunmap() from unmap_kernel_range() Impact: proper vcache flush on unmap_kernel_range() flush_cache_vunmap() should be called before pages are unmapped. Add a call to it in unmap_kernel_range(). Signed-off-by: Tejun Heo --- mm/vmalloc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 75f49d312e8..c37924a2ee3 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1012,6 +1012,8 @@ void __init vmalloc_init(void) void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; + + flush_cache_vunmap(addr, end); vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } -- cgit v1.2.3-70-g09d2 From f0aa6617903648077dffe5cfcf7c4458f4610fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: implement vm_area_register_early() Impact: allow multiple early vm areas There are places where kernel VM area needs to be allocated before vmalloc is initialized. This is done by allocating static vm_struct, initializing several fields and linking it to vmlist and later vmalloc initialization picking up these from vmlist. This is currently done manually and if there's more than one such areas, there's no defined way to arbitrate who gets which address. This patch implements vm_area_register_early(), which takes vm_area struct with flags and size initialized, assigns address to it and puts it on the vmlist. This way, multiple early vm areas can determine which addresses they should use. The only current user - alpha mm init - is converted to use it. Signed-off-by: Tejun Heo --- arch/alpha/mm/init.c | 20 +++++++++++++------- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 24 ++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 5d7a16eab31..df6df025ded 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -189,9 +189,21 @@ callback_init(void * kernel_end) if (alpha_using_srm) { static struct vm_struct console_remap_vm; - unsigned long vaddr = VMALLOC_START; + unsigned long nr_pages = 0; + unsigned long vaddr; unsigned long i, j; + /* calculate needed size */ + for (i = 0; i < crb->map_entries; ++i) + nr_pages += crb->map[i].count; + + /* register the vm area */ + console_remap_vm.flags = VM_ALLOC; + console_remap_vm.size = nr_pages << PAGE_SHIFT; + vm_area_register_early(&console_remap_vm); + + vaddr = (unsigned long)consle_remap_vm.addr; + /* Set up the third level PTEs and update the virtual addresses of the CRB entries. */ for (i = 0; i < crb->map_entries; ++i) { @@ -213,12 +225,6 @@ callback_init(void * kernel_end) vaddr += PAGE_SIZE; } } - - /* Let vmalloc know that we've allocated some space. */ - console_remap_vm.flags = VM_ALLOC; - console_remap_vm.addr = (void *) VMALLOC_START; - console_remap_vm.size = vaddr - VMALLOC_START; - vmlist = &console_remap_vm; } callback_init_done = 1; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a98..bbc05139229 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,5 +106,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_register_early(struct vm_struct *vm); #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c37924a2ee3..d206261ad9e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -982,6 +983,29 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); +/** + * vm_area_register_early - register vmap area early during boot + * @vm: vm_struct to register + * @size: size of area to register + * + * This function is used to register kernel vm area before + * vmalloc_init() is called. @vm->size and @vm->flags should contain + * proper values on entry and other fields should be zero. On return, + * vm->addr contains the allocated address. + * + * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. + */ +void __init vm_area_register_early(struct vm_struct *vm) +{ + static size_t vm_init_off __initdata; + + vm->addr = (void *)VMALLOC_START + vm_init_off; + vm_init_off = PFN_ALIGN(vm_init_off + vm->size); + + vm->next = vmlist; + vmlist = vm; +} + void __init vmalloc_init(void) { struct vmap_area *va; -- cgit v1.2.3-70-g09d2 From 8fc48985006da4ceba24508db64ec77fc0dfe3bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: add un/map_kernel_range_noflush() Impact: two more public map/unmap functions Implement map_kernel_range_noflush() and unmap_kernel_range_noflush(). These functions respectively map and unmap address range in kernel VM area but doesn't do any vcache or tlb flushing. These will be used by new percpu allocator. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 3 +++ mm/vmalloc.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bbc05139229..599ba798431 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -91,6 +91,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); +extern int map_kernel_range_noflush(unsigned long start, unsigned long size, + pgprot_t prot, struct page **pages); +extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d206261ad9e..224eca9650a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -153,8 +153,8 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, * * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ -static int vmap_page_range(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) +static int vmap_page_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; @@ -170,13 +170,22 @@ static int vmap_page_range(unsigned long start, unsigned long end, if (err) break; } while (pgd++, addr = next, addr != end); - flush_cache_vmap(start, end); if (unlikely(err)) return err; return nr; } +static int vmap_page_range(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) +{ + int ret; + + ret = vmap_page_range_noflush(start, end, prot, pages); + flush_cache_vmap(start, end); + return ret; +} + static inline int is_vmalloc_or_module_addr(const void *x) { /* @@ -1033,6 +1042,58 @@ void __init vmalloc_init(void) vmap_initialized = true; } +/** + * map_kernel_range_noflush - map kernel VM area with the specified pages + * @addr: start of the VM area to map + * @size: size of the VM area to map + * @prot: page protection flags to use + * @pages: pages to map + * + * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vmap() on to-be-mapped areas + * before calling this function. + * + * RETURNS: + * The number of pages mapped on success, -errno on failure. + */ +int map_kernel_range_noflush(unsigned long addr, unsigned long size, + pgprot_t prot, struct page **pages) +{ + return vmap_page_range_noflush(addr, addr + size, prot, pages); +} + +/** + * unmap_kernel_range_noflush - unmap kernel VM area + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vunmap() on to-be-mapped areas + * before calling this function and flush_tlb_kernel_range() after. + */ +void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) +{ + vunmap_page_range(addr, addr + size); +} + +/** + * unmap_kernel_range - unmap kernel VM area and flush cache and TLB + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Similar to unmap_kernel_range_noflush() but flushes vcache before + * the unmapping and tlb after. + */ void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; -- cgit v1.2.3-70-g09d2 From c0c0a29379b5848aec2e8f1c58d853d3cb7118b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: vmalloc: add @align to vm_area_register_early() Impact: allow larger alignment for early vmalloc area allocation Some early vmalloc users might want larger alignment, for example, for custom large page mapping. Add @align to vm_area_register_early(). While at it, drop docbook comment on non-existent @size. Signed-off-by: Tejun Heo Cc: Nick Piggin Cc: Ivan Kokshaysky --- arch/alpha/mm/init.c | 2 +- include/linux/vmalloc.h | 2 +- mm/percpu.c | 2 +- mm/vmalloc.c | 11 +++++++---- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index df6df025ded..91eddd8505d 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -200,7 +200,7 @@ callback_init(void * kernel_end) /* register the vm area */ console_remap_vm.flags = VM_ALLOC; console_remap_vm.size = nr_pages << PAGE_SHIFT; - vm_area_register_early(&console_remap_vm); + vm_area_register_early(&console_remap_vm, PAGE_SIZE); vaddr = (unsigned long)consle_remap_vm.addr; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 599ba798431..2f6994fdf0e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -109,6 +109,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm); +extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/percpu.c b/mm/percpu.c index ed92caa2aa3..41e7a5f5ab1 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -860,7 +860,7 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, /* init and register vm area */ static_vm.flags = VM_ALLOC; static_vm.size = pcpu_chunk_size; - vm_area_register_early(&static_vm); + vm_area_register_early(&static_vm, PAGE_SIZE); /* init static_chunk */ static_chunk = alloc_bootmem(pcpu_chunk_struct_size); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 224eca9650a..366ae9ea6af 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -995,7 +995,7 @@ EXPORT_SYMBOL(vm_map_ram); /** * vm_area_register_early - register vmap area early during boot * @vm: vm_struct to register - * @size: size of area to register + * @align: requested alignment * * This function is used to register kernel vm area before * vmalloc_init() is called. @vm->size and @vm->flags should contain @@ -1004,12 +1004,15 @@ EXPORT_SYMBOL(vm_map_ram); * * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. */ -void __init vm_area_register_early(struct vm_struct *vm) +void __init vm_area_register_early(struct vm_struct *vm, size_t align) { static size_t vm_init_off __initdata; + unsigned long addr; + + addr = ALIGN(VMALLOC_START + vm_init_off, align); + vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; - vm->addr = (void *)VMALLOC_START + vm_init_off; - vm_init_off = PFN_ALIGN(vm_init_off + vm->size); + vm->addr = (void *)addr; vm->next = vmlist; vmlist = vm; -- cgit v1.2.3-70-g09d2 From 34754b69a6f87aa6aa2860525a82f12532f83afd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Feb 2009 16:04:03 +0100 Subject: x86: make vmap yell louder when it is used under irqs_disabled() Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 6 +++--- mm/vmalloc.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a84ac7b570e..6907b8e85d5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { - unsigned long flags; char *vaddr; int nr_pages = 2; struct page *pages[2]; int i; + might_sleep(); if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); @@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) nr_pages = 1; vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); BUG_ON(!vaddr); - local_irq_save(flags); + local_irq_disable(); memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); + local_irq_enable(); vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4dd2636d0b9..f83a70167b9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1257,6 +1257,7 @@ EXPORT_SYMBOL(vfree); void vunmap(const void *addr) { BUG_ON(in_interrupt()); + might_sleep(); __vunmap(addr, 0); } EXPORT_SYMBOL(vunmap); @@ -1276,6 +1277,8 @@ void *vmap(struct page **pages, unsigned int count, { struct vm_struct *area; + might_sleep(); + if (count > num_physpages) return NULL; -- cgit v1.2.3-70-g09d2