From cef2ac3f6c8ab532e49cf69d05f540931ad8ee64 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:17 -0700 Subject: vmalloc: make find_vm_area check in range Currently, __find_vmap_area searches for the kernel VM area starting at a given address. This patch changes this behavior so that it searches for the kernel VM area to which the address belongs. This change is needed by remap_vmalloc_range_partial to be introduced in later patch that receives any position of kernel VM area as target address. This patch changes the condition (addr > va->va_start) to the equivalent (addr >= va->va_end) by taking advantage of the fact that each kernel VM area is non-overlapping. Signed-off-by: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Cc: Vivek Goyal Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d365724feb0..3875fa2f0f6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -292,7 +292,7 @@ static struct vmap_area *__find_vmap_area(unsigned long addr) va = rb_entry(n, struct vmap_area, rb_node); if (addr < va->va_start) n = n->rb_left; - else if (addr > va->va_start) + else if (addr >= va->va_end) n = n->rb_right; else return va; -- cgit v1.2.3-70-g09d2 From e69e9d4aee712a22665f008ae0550bb3d7c7f7c1 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:18 -0700 Subject: vmalloc: introduce remap_vmalloc_range_partial We want to allocate ELF note segment buffer on the 2nd kernel in vmalloc space and remap it to user-space in order to reduce the risk that memory allocation fails on system with huge number of CPUs and so with huge ELF note segment that exceeds 11-order block size. Although there's already remap_vmalloc_range for the purpose of remapping vmalloc memory to user-space, we need to specify user-space range via vma. Mmap on /proc/vmcore needs to remap range across multiple objects, so the interface that requires vma to cover full range is problematic. This patch introduces remap_vmalloc_range_partial that receives user-space range as a pair of base address and size and can be used for mmap on /proc/vmcore case. remap_vmalloc_range is rewritten using remap_vmalloc_range_partial. [akpm@linux-foundation.org: use PAGE_ALIGNED()] Signed-off-by: HATAYAMA Daisuke Cc: KOSAKI Motohiro Cc: Vivek Goyal Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 +++ mm/vmalloc.c | 67 +++++++++++++++++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 22 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7d5773a99f2..dd0a2c81052 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -82,6 +82,10 @@ extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); extern void vunmap(const void *addr); +extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, + unsigned long uaddr, void *kaddr, + unsigned long size); + extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); void vmalloc_sync_all(void); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3875fa2f0f6..b7259906a80 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1476,10 +1476,9 @@ static void __vunmap(const void *addr, int deallocate_pages) if (!addr) return; - if ((PAGE_SIZE-1) & (unsigned long)addr) { - WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); + if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", + addr)); return; - } area = remove_vm_area(addr); if (unlikely(!area)) { @@ -2148,42 +2147,43 @@ finished: } /** - * remap_vmalloc_range - map vmalloc pages to userspace - * @vma: vma to cover (map full range of vma) - * @addr: vmalloc memory - * @pgoff: number of pages into addr before first page to map + * remap_vmalloc_range_partial - map vmalloc pages to userspace + * @vma: vma to cover + * @uaddr: target user address to start at + * @kaddr: virtual address of vmalloc kernel memory + * @size: size of map area * * Returns: 0 for success, -Exxx on failure * - * This function checks that addr is a valid vmalloc'ed area, and - * that it is big enough to cover the vma. Will return failure if - * that criteria isn't met. + * This function checks that @kaddr is a valid vmalloc'ed area, + * and that it is big enough to cover the range starting at + * @uaddr in @vma. Will return failure if that criteria isn't + * met. * * Similar to remap_pfn_range() (see mm/memory.c) */ -int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, - unsigned long pgoff) +int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, + void *kaddr, unsigned long size) { struct vm_struct *area; - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - if ((PAGE_SIZE-1) & (unsigned long)addr) + size = PAGE_ALIGN(size); + + if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr)) return -EINVAL; - area = find_vm_area(addr); + area = find_vm_area(kaddr); if (!area) return -EINVAL; if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) + if (kaddr + size > area->addr + area->size) return -EINVAL; - addr += pgoff << PAGE_SHIFT; do { - struct page *page = vmalloc_to_page(addr); + struct page *page = vmalloc_to_page(kaddr); int ret; ret = vm_insert_page(vma, uaddr, page); @@ -2191,14 +2191,37 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, return ret; uaddr += PAGE_SIZE; - addr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); + kaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } while (size > 0); vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } +EXPORT_SYMBOL(remap_vmalloc_range_partial); + +/** + * remap_vmalloc_range - map vmalloc pages to userspace + * @vma: vma to cover (map full range of vma) + * @addr: vmalloc memory + * @pgoff: number of pages into addr before first page to map + * + * Returns: 0 for success, -Exxx on failure + * + * This function checks that addr is a valid vmalloc'ed area, and + * that it is big enough to cover the vma. Will return failure if + * that criteria isn't met. + * + * Similar to remap_pfn_range() (see mm/memory.c) + */ +int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, + unsigned long pgoff) +{ + return remap_vmalloc_range_partial(vma, vma->vm_start, + addr + (pgoff << PAGE_SHIFT), + vma->vm_end - vma->vm_start); +} EXPORT_SYMBOL(remap_vmalloc_range); /* -- cgit v1.2.3-70-g09d2 From d82b1d85760a8344d06272da67f0684243235fac Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:47 -0700 Subject: mm, vmalloc: only call setup_vmalloc_vm() only in __get_vm_area_node() Now for insert_vmalloc_vm, it only calls the two functions: - setup_vmalloc_vm: fill vm_struct and vmap_area instances - clear_vm_unlist: clear VM_UNLIST bit in vm_struct->flags So in __get_vm_area_node(), if VM_UNLIST bit unset in flags, that is the else branch here, we don't need to clear VM_UNLIST bit for vm->flags since this bit is obviously not set. That is to say, we could only call setup_vmalloc_vm instead of insert_vmalloc_vm here. And then we could even remove the if test here. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b7259906a80..d23e70ec45a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1367,16 +1367,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - /* - * When this function is called from __vmalloc_node_range, - * we add VM_UNLIST flag to avoid accessing uninitialized - * members of vm_struct such as pages and nr_pages fields. - * They will be set later. - */ - if (flags & VM_UNLIST) - setup_vmalloc_vm(area, va, flags, caller); - else - insert_vmalloc_vm(area, va, flags, caller); + setup_vmalloc_vm(area, va, flags, caller); return area; } -- cgit v1.2.3-70-g09d2 From 3645cb4a4eb2002dad17b314559badf8a20e55a7 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:48 -0700 Subject: mm, vmalloc: call setup_vmalloc_vm() instead of insert_vmalloc_vm() Here we pass flags with only VM_ALLOC bit set, it is unnecessary to call clear_vm_unlist to clear VM_UNLIST bit. So use setup_vmalloc_vm instead of insert_vmalloc_vm. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d23e70ec45a..db48d513598 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2526,8 +2526,8 @@ found: /* insert all vm's */ for (area = 0; area < nr_vms; area++) - insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, - pcpu_get_vm_areas); + setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC, + pcpu_get_vm_areas); kfree(vas); return vms; -- cgit v1.2.3-70-g09d2 From f6d480059bedaf4feb06466c770f5fcace9eca31 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:49 -0700 Subject: mm, vmalloc: remove insert_vmalloc_vm() Now this function is nowhere used, we can remove it directly. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index db48d513598..bd60bffd9ae 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1322,13 +1322,6 @@ static void clear_vm_unlist(struct vm_struct *vm) vm->flags &= ~VM_UNLIST; } -static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, - unsigned long flags, const void *caller) -{ - setup_vmalloc_vm(vm, va, flags, caller); - clear_vm_unlist(vm); -} - static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller) -- cgit v1.2.3-70-g09d2 From 0f2d4a8e27108ad3b2555396b06392be590fe287 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:50 -0700 Subject: mm, vmalloc: use clamp() to simplify code Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bd60bffd9ae..91a10472a39 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1330,16 +1330,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, struct vm_struct *area; BUG_ON(in_interrupt()); - if (flags & VM_IOREMAP) { - int bit = fls(size); - - if (bit > IOREMAP_MAX_ORDER) - bit = IOREMAP_MAX_ORDER; - else if (bit < PAGE_SHIFT) - bit = PAGE_SHIFT; - - align = 1ul << bit; - } + if (flags & VM_IOREMAP) + align = 1ul << clamp(fls(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); size = PAGE_ALIGN(size); if (unlikely(!size)) -- cgit v1.2.3-70-g09d2 From ab15d9b4cbc2b6497023f554a152c2573ca53671 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jul 2013 15:59:53 -0700 Subject: mm/vmalloc.c: unbreak __vunmap() There is an extra semi-colon so the function always returns. Signed-off-by: Dan Carpenter Acked-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 91a10472a39..96b77a98254 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1453,7 +1453,7 @@ static void __vunmap(const void *addr, int deallocate_pages) return; if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", - addr)); + addr)) return; area = remove_vm_area(addr); -- cgit v1.2.3-70-g09d2 From 3fcd76e8028e0be37b02a2002b4f56755daeda06 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:54 -0700 Subject: mm/vmalloc.c: remove dead code in vb_alloc Space in a vmap block that was once allocated is considered dirty and not made available for allocation again before the whole block is recycled. The result is that free space within a vmap block is always contiguous. So if a vmap block has enough free space for allocation, the allocation is impossible to fail. Thus, the fragmented block purging was never invoked from vb_alloc(). So remove this dead code. [ Same patches also sent by: Chanho Min Johannes Weiner but git doesn't do "multiple authors" ] Signed-off-by: Zhang Yanfei Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 96b77a98254..a35f4f5bb90 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -910,7 +910,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) struct vmap_block *vb; unsigned long addr = 0; unsigned int order; - int purge = 0; BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); @@ -934,17 +933,7 @@ again: if (vb->free < 1UL << order) goto next; - i = bitmap_find_free_region(vb->alloc_map, - VMAP_BBMAP_BITS, order); - - if (i < 0) { - if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { - /* fragmented and no outstanding allocations */ - BUG_ON(vb->dirty != VMAP_BBMAP_BITS); - purge = 1; - } - goto next; - } + i = VMAP_BBMAP_BITS - vb->free; addr = vb->va->va_start + (i << PAGE_SHIFT); BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(vb->va->va_start)); @@ -960,9 +949,6 @@ next: spin_unlock(&vb->lock); } - if (purge) - purge_fragmented_blocks_thiscpu(); - put_cpu_var(vmap_block_queue); rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 9da3f59fbdb57c9447ddb42681f6ab98faef353a Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:55 -0700 Subject: mm/vmalloc.c: remove unused purge_fragmented_blocks_thiscpu This function is nowhere used now, so remove it. Signed-off-by: Zhang Yanfei Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a35f4f5bb90..99d045a0a0e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -891,11 +891,6 @@ static void purge_fragmented_blocks(int cpu) } } -static void purge_fragmented_blocks_thiscpu(void) -{ - purge_fragmented_blocks(smp_processor_id()); -} - static void purge_fragmented_blocks_allcpus(void) { int cpu; -- cgit v1.2.3-70-g09d2 From b8e748b6c32999f221ea4786557b8e7e6c4e4e7a Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:56 -0700 Subject: mm/vmalloc.c: remove alloc_map from vmap_block As we have removed the dead code in the vb_alloc, it seems there is no place to use the alloc_map. So there is no reason to maintain the alloc_map in vmap_block. Signed-off-by: Zhang Yanfei Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 99d045a0a0e..7ac2a1f8358 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -754,7 +754,6 @@ struct vmap_block { struct vmap_area *va; struct vmap_block_queue *vbq; unsigned long free, dirty; - DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); struct list_head free_list; struct rcu_head rcu_head; @@ -820,7 +819,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) vb->va = va; vb->free = VMAP_BBMAP_BITS; vb->dirty = 0; - bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); INIT_LIST_HEAD(&vb->free_list); @@ -873,7 +871,6 @@ static void purge_fragmented_blocks(int cpu) if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { vb->free = 0; /* prevent further allocs after releasing lock */ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ - bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); spin_lock(&vbq->lock); list_del_rcu(&vb->free_list); -- cgit v1.2.3-70-g09d2 From 46c001a2753f47ffa621131baa3409e636515347 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:57 -0700 Subject: mm/vmalloc.c: emit the failure message before return Use goto to jump to the fail label to give a failure message before returning NULL. This makes the failure handling in this function consistent. Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7ac2a1f8358..d81b9f70d92 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1642,7 +1642,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); if (!addr) - return NULL; + goto fail; /* * In this function, newly allocated vm_struct has VM_UNLIST flag. -- cgit v1.2.3-70-g09d2 From 20fc02b477c526c6a85f84e3770373778ff2f97e Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:58 -0700 Subject: mm/vmalloc.c: rename VM_UNLIST to VM_UNINITIALIZED VM_UNLIST was used to indicate that the vm_struct is not listed in vmlist. But after commit 4341fa454796 ("mm, vmalloc: remove list management of vmlist after initializing vmalloc"), the meaning of this flag changed. It now means the vm_struct is not fully initialized. So renaming it to VM_UNINITIALIZED seems more reasonable. Also change clear_vm_unlist to clear_vm_uninitialized_flag. Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 12 ++++++------ mm/vmalloc.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dd0a2c81052..4b8a89189a2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,12 +10,12 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ /* bits in flags of vmalloc's vm_struct below */ -#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ -#define VM_ALLOC 0x00000002 /* vmalloc() */ -#define VM_MAP 0x00000004 /* vmap()ed pages */ -#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ -#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ -#define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ +#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ +#define VM_ALLOC 0x00000002 /* vmalloc() */ +#define VM_MAP 0x00000004 /* vmap()ed pages */ +#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ +#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ +#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d81b9f70d92..af40068271c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1289,15 +1289,15 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, spin_unlock(&vmap_area_lock); } -static void clear_vm_unlist(struct vm_struct *vm) +static void clear_vm_uninitialized_flag(struct vm_struct *vm) { /* - * Before removing VM_UNLIST, + * Before removing VM_UNINITIALIZED, * we should make sure that vm has proper values. * Pair with smp_rmb() in show_numa_info(). */ smp_wmb(); - vm->flags &= ~VM_UNLIST; + vm->flags &= ~VM_UNINITIALIZED; } static struct vm_struct *__get_vm_area_node(unsigned long size, @@ -1635,7 +1635,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, start, end, node, gfp_mask, caller); if (!area) goto fail; @@ -1645,11 +1645,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, goto fail; /* - * In this function, newly allocated vm_struct has VM_UNLIST flag. - * It means that vm_struct is not fully initialized. + * In this function, newly allocated vm_struct has VM_UNINITIALIZED + * flag. It means that vm_struct is not fully initialized. * Now, it is fully initialized, so remove this flag here. */ - clear_vm_unlist(area); + clear_vm_uninitialized_flag(area); /* * A ref_count = 3 is needed because the vm_struct and vmap_area @@ -2569,9 +2569,9 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; - /* Pair with smp_wmb() in clear_vm_unlist() */ + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ smp_rmb(); - if (v->flags & VM_UNLIST) + if (v->flags & VM_UNINITIALIZED) return; memset(counters, 0, nr_node_ids * sizeof(unsigned int)); -- cgit v1.2.3-70-g09d2 From d157a55815ffff48caec311dfb543ce8a79e283e Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 15:59:59 -0700 Subject: mm/vmalloc.c: check VM_UNINITIALIZED flag in s_show instead of show_numa_info We should check the VM_UNITIALIZED flag in s_show(). If this flag is set, that said, the vm_struct is not fully initialized. So it is unnecessary to try to show the information contained in vm_struct. We checked this flag in show_numa_info(), but I think it's better to check it earlier. Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index af40068271c..318c5007f22 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2569,11 +2569,6 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; - /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ - smp_rmb(); - if (v->flags & VM_UNINITIALIZED) - return; - memset(counters, 0, nr_node_ids * sizeof(unsigned int)); for (nr = 0; nr < v->nr_pages; nr++) @@ -2602,6 +2597,11 @@ static int s_show(struct seq_file *m, void *p) v = va->vm; + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ + smp_rmb(); + if (v->flags & VM_UNINITIALIZED) + return 0; + seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); -- cgit v1.2.3-70-g09d2 From 59d3132f8abdc18301898febf205d00db5f0458c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 16:00:08 -0700 Subject: vfree: don't schedule free_work() if llist_add() returns false vfree() only needs schedule_work(&p->wq) if p->list was empty, otherwise vfree_deferred->wq is already pending or it is running and didn't do llist_del_all() yet. Signed-off-by: Oleg Nesterov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 318c5007f22..a649186669a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1477,7 +1477,6 @@ static void __vunmap(const void *addr, int deallocate_pages) * conventions for vfree() arch-depenedent would be a really bad idea) * * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) - * */ void vfree(const void *addr) { @@ -1489,8 +1488,8 @@ void vfree(const void *addr) return; if (unlikely(in_interrupt())) { struct vfree_deferred *p = &__get_cpu_var(vfree_deferred); - llist_add((struct llist_node *)addr, &p->list); - schedule_work(&p->wq); + if (llist_add((struct llist_node *)addr, &p->list)) + schedule_work(&p->wq); } else __vunmap(addr, 1); } -- cgit v1.2.3-70-g09d2 From bcb615a81b1765864c71c50afb56631e7a1e5283 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 8 Jul 2013 16:00:19 -0700 Subject: mm/vmalloc.c: fix an overflow bug in alloc_vmap_area() When searching a vmap area in the vmalloc space, we use (addr + size - 1) to check if the value is less than addr, which is an overflow. But we assign (addr + size) to vmap_area->va_end. So if we come across the below case: (addr + size - 1) : not overflow (addr + size) : overflow we will assign an overflow value (e.g 0) to vmap_area->va_end, And this will trigger BUG in __insert_vmap_area, causing system panic. So using (addr + size) to check the overflow should be the correct behaviour, not (addr + size - 1). Signed-off-by: Zhang Yanfei Reported-by: Ghennadi Procopciuc Tested-by: Daniel Baluta Cc: David Rientjes Cc: Minchan Kim Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a649186669a..13a54953a27 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -388,12 +388,12 @@ nocache: addr = ALIGN(first->va_end, align); if (addr < vstart) goto nocache; - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; } else { addr = ALIGN(vstart, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; n = vmap_area_root.rb_node; @@ -420,7 +420,7 @@ nocache: if (addr + cached_hole_size < first->va_start) cached_hole_size = first->va_start - addr; addr = ALIGN(first->va_end, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; if (list_is_last(&first->list, &vmap_area_list)) -- cgit v1.2.3-70-g09d2