From ef691947d8a3d479e67652312783aedcf629320a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 1 Dec 2010 15:45:48 -0800 Subject: vmalloc: remove vmalloc_sync_all() from alloc_vm_area() There's no need for it: it will get faulted into the current pagetable as needed. Signed-off-by: Jeremy Fitzhardinge --- mm/vmalloc.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5d6030235d7..fdf4b1e88e5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2148,10 +2148,6 @@ struct vm_struct *alloc_vm_area(size_t size) return NULL; } - /* Make sure the pagetables are constructed in process kernel - mappings */ - vmalloc_sync_all(); - return area; } EXPORT_SYMBOL_GPL(alloc_vm_area); -- cgit v1.2.3-70-g09d2 From 248ac0e1943ad1796393d281b096184719eb3f97 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 24 May 2011 17:11:43 -0700 Subject: mm/vmalloc: remove guard page from between vmap blocks The vmap allocator is used to, among other things, allocate per-cpu vmap blocks, where each vmap block is naturally aligned to its own size. Obviously, leaving a guard page after each vmap area forbids packing vmap blocks efficiently and can make the kernel run out of possible vmap blocks long before overall vmap space is exhausted. The new interface to map a user-supplied page array into linear vmalloc space (vm_map_ram) insists on allocating from a vmap block (instead of falling back to a custom area) when the area size is below a certain threshold. With heavy users of this interface (e.g. XFS) and limited vmalloc space on 32-bit, vmap block exhaustion is a real problem. Remove the guard page from the core vmap allocator. vmalloc and the old vmap interface enforce a guard page on their own at a higher level. Note that without this patch, we had accidental guard pages after those vm_map_ram areas that happened to be at the end of a vmap block, but not between every area. This patch removes this accidental guard page only. If we want guard pages after every vm_map_ram area, this should be done separately. And just like with vmalloc and the old interface on a different level, not in the core allocator. Mel pointed out: "If necessary, the guard page could be reintroduced as a debugging-only option (CONFIG_DEBUG_PAGEALLOC?). Otherwise it seems reasonable." Signed-off-by: Johannes Weiner Cc: Nick Piggin Cc: Dave Chinner Acked-by: Mel Gorman Cc: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5d6030235d7..4581ddcdda5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -375,7 +375,7 @@ nocache: /* find starting point for our search */ if (free_vmap_cache) { first = rb_entry(free_vmap_cache, struct vmap_area, rb_node); - addr = ALIGN(first->va_end + PAGE_SIZE, align); + addr = ALIGN(first->va_end, align); if (addr < vstart) goto nocache; if (addr + size - 1 < addr) @@ -406,10 +406,10 @@ nocache: } /* from the starting point, walk areas until a suitable hole is found */ - while (addr + size >= first->va_start && addr + size <= vend) { + while (addr + size > first->va_start && addr + size <= vend) { if (addr + cached_hole_size < first->va_start) cached_hole_size = first->va_start - addr; - addr = ALIGN(first->va_end + PAGE_SIZE, align); + addr = ALIGN(first->va_end, align); if (addr + size - 1 < addr) goto overflow; -- cgit v1.2.3-70-g09d2 From 22943ab116af1ead4dc112ec408a93cf1365b34a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 24 May 2011 17:12:18 -0700 Subject: mm: print vmalloc() state after allocation failures I was tracking down a page allocation failure that ended up in vmalloc(). Since vmalloc() uses 0-order pages, if somebody asks for an insane amount of memory, we'll still get a warning with "order:0" in it. That's not very useful. During recovery, vmalloc() also nicely frees all of the memory that it got up to the point of the failure. That is wonderful, but it also quickly hides any issues. We have a much different sitation if vmalloc() repeatedly fails 10GB in to: vmalloc(100 * 1<<30); versus repeatedly failing 4096 bytes in to a: vmalloc(8192); This patch will print out messages that look like this: [ 68.123503] vmalloc: allocation failure, allocated 6680576 of 13426688 bytes [ 68.124218] bash: page allocation failure: order:0, mode:0xd2 [ 68.124811] Pid: 3770, comm: bash Not tainted 2.6.39-rc3-00082-g85f2e68-dirty #333 [ 68.125579] Call Trace: [ 68.125853] [] warn_alloc_failed+0x146/0x170 [ 68.126464] [] ? printk+0x6c/0x70 [ 68.126791] [] ? alloc_pages_current+0x94/0xe0 [ 68.127661] [] __vmalloc_node_range+0x237/0x290 ... The 'order' variable is added for clarity when calling warn_alloc_failed() to avoid having an unexplained '0' as an argument. The 'tmp_mask' is because adding an open-coded '| __GFP_NOWARN' would take us over 80 columns for the alloc_pages_node() call. If we are going to add a line, it might as well be one that makes the sucker easier to read. As a side issue, I also noticed that ctl_ioctl() does vmalloc() based solely on an unverified value passed in from userspace. Granted, it's under CAP_SYS_ADMIN, but it still frightens me a bit. Signed-off-by: Dave Hansen Cc: Johannes Weiner Cc: David Rientjes Cc: Michal Nazarewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4581ddcdda5..b5ccf3158d8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1534,6 +1534,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node, void *caller) { + const int order = 0; struct page **pages; unsigned int nr_pages, array_size, i; gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; @@ -1560,11 +1561,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, for (i = 0; i < area->nr_pages; i++) { struct page *page; + gfp_t tmp_mask = gfp_mask | __GFP_NOWARN; if (node < 0) - page = alloc_page(gfp_mask); + page = alloc_page(tmp_mask); else - page = alloc_pages_node(node, gfp_mask, 0); + page = alloc_pages_node(node, tmp_mask, order); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ @@ -1579,6 +1581,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, return area->addr; fail: + warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, " + "allocated %ld of %ld bytes\n", + (area->nr_pages*PAGE_SIZE), area->size); vfree(area->addr); return NULL; } -- cgit v1.2.3-70-g09d2