From c1279c4ef37a06ba708e6b1f6fd98b45c52770f6 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 6 Jan 2009 14:39:18 -0800 Subject: mm: vmalloc tweak failure printk If we can't service a vmalloc allocation, show size of the allocation that actually failed. Useful for debugging. Signed-off-by: Glauber Costa Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7465f22fec0..2644afb9d6a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -381,8 +381,9 @@ found: goto retry; } if (printk_ratelimit()) - printk(KERN_WARNING "vmap allocation failed: " - "use vmalloc= to increase size.\n"); + printk(KERN_WARNING + "vmap allocation for size %lu failed: " + "use vmalloc= to increase size.\n", size); return ERR_PTR(-EBUSY); } -- cgit v1.2.3-70-g09d2 From 848778483351e90f9a2c587bdbe0c78b17c1e30b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 6 Jan 2009 14:39:19 -0800 Subject: mm: vmalloc improve vmallocinfo If we do that, output of files like /proc/vmallocinfo will show things like "vmalloc_32", "vmalloc_user", or whomever the caller was as the caller. This info is not as useful as the real caller of the allocation. So, proposal is to call __vmalloc_node node directly, with matching parameters to save the caller information Signed-off-by: Glauber Costa Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2644afb9d6a..b62ea569aa4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1376,7 +1376,8 @@ void *vmalloc_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; @@ -1421,7 +1422,8 @@ EXPORT_SYMBOL(vmalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); + return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + -1, __builtin_return_address(0)); } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) @@ -1441,7 +1443,8 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL); + return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL, + -1, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32); @@ -1457,7 +1460,8 @@ void *vmalloc_32_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, + -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; -- cgit v1.2.3-70-g09d2 From e97a630eb0f5b8b380fd67504de6cedebb489003 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:39:19 -0800 Subject: mm: vmalloc use mutex for purge The vmalloc purge lock can be a mutex so we can sleep while a purge is going on (purge involves a global kernel TLB invalidate, so it can take quite a while). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b62ea569aa4..78689cba178 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -473,7 +474,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, int sync, int force_flush) { - static DEFINE_SPINLOCK(purge_lock); + static DEFINE_MUTEX(purge_lock); LIST_HEAD(valist); struct vmap_area *va; int nr = 0; @@ -484,10 +485,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, * the case that isn't actually used at the moment anyway. */ if (!sync && !force_flush) { - if (!spin_trylock(&purge_lock)) + if (!mutex_trylock(&purge_lock)) return; } else - spin_lock(&purge_lock); + mutex_lock(&purge_lock); rcu_read_lock(); list_for_each_entry_rcu(va, &vmap_area_list, list) { @@ -519,7 +520,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, __free_vmap_area(va); spin_unlock(&vmap_area_lock); } - spin_unlock(&purge_lock); + mutex_unlock(&purge_lock); } /* -- cgit v1.2.3-70-g09d2 From cd52858c73f9f7df859a08fb08496ca39b9b3d8d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:39:20 -0800 Subject: mm: vmalloc make lazy unmapping configurable Lazy unmapping in the vmalloc code has now opened the possibility for use after free bugs to go undetected. We can catch those by forcing an unmap and flush (which is going to be slow, but that's what happens). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 78689cba178..c5db9a7264d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -434,6 +434,27 @@ static void unmap_vmap_area(struct vmap_area *va) vunmap_page_range(va->va_start, va->va_end); } +static void vmap_debug_free_range(unsigned long start, unsigned long end) +{ + /* + * Unmap page tables and force a TLB flush immediately if + * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free + * bugs similarly to those in linear kernel virtual address + * space after a page has been freed. + * + * All the lazy freeing logic is still retained, in order to + * minimise intrusiveness of this debugging feature. + * + * This is going to be *slow* (linear kernel virtual address + * debugging doesn't do a broadcast TLB flush so it is a lot + * faster). + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + vunmap_page_range(start, end); + flush_tlb_kernel_range(start, end); +#endif +} + /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -914,6 +935,7 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr & (PAGE_SIZE-1)); debug_check_no_locks_freed(mem, size); + vmap_debug_free_range(addr, addr+size); if (likely(count <= VMAP_MAX_ALLOC)) vb_free(mem, size); @@ -1130,6 +1152,8 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->private; struct vm_struct *tmp, **p; + + vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); vm->size -= PAGE_SIZE; -- cgit v1.2.3-70-g09d2