mmap: don't return ENOMEM when mapcount is temporarily exceeded in munmap()

On ia64, the following test program exit abnormally, because glibc thread library called abort(). ======================================================== (gdb) bt #0 0xa000000000010620 in __kernel_syscall_via_break () #1 0x20000000003208e0 in raise () from /lib/libc.so.6.1 #2 0x2000000000324090 in abort () from /lib/libc.so.6.1 #3 0x200000000027c3e0 in __deallocate_stack () from /lib/libpthread.so.0 #4 0x200000000027f7c0 in start_thread () from /lib/libpthread.so.0 #5 0x200000000047ef60 in __clone2 () from /lib/libc.so.6.1 ======================================================== The fact is, glibc call munmap() when thread exitng time for freeing stack, and it assume munlock() never fail. However, munmap() often make vma splitting and it with many mapcount make -ENOMEM. Oh well, that's crazy, because stack unmapping never increase mapcount. The maxcount exceeding is only temporary. internal temporary exceeding shouldn't make ENOMEM. This patch does it. test_max_mapcount.c ================================================================== #include<stdio.h> #include<stdlib.h> #include<string.h> #include<pthread.h> #include<errno.h> #include<unistd.h> #define THREAD_NUM 30000 #define MAL_SIZE (8*1024*1024) void *wait_thread(void *args) { void *addr; addr = malloc(MAL_SIZE); sleep(10); return NULL; } void *wait_thread2(void *args) { sleep(60); return NULL; } int main(int argc, char *argv[]) { int i; pthread_t thread[THREAD_NUM], th; int ret, count = 0; pthread_attr_t attr; ret = pthread_attr_init(&attr); if(ret) { perror("pthread_attr_init"); } ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if(ret) { perror("pthread_attr_setdetachstate"); } for (i = 0; i < THREAD_NUM; i++) { ret = pthread_create(&th, &attr, wait_thread, NULL); if(ret) { fprintf(stderr, "[%d] ", count); perror("pthread_create"); } else { printf("[%d] create OK.\n", count); } count++; ret = pthread_create(&thread[i], &attr, wait_thread2, NULL); if(ret) { fprintf(stderr, "[%d] ", count); perror("pthread_create"); } else { printf("[%d] create OK.\n", count); } count++; } sleep(3600); return 0; } ================================================================== [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> 2009-12-14 17:57:56 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-12-15 08:53:11 -0800
commit: 659ace584e7a9fdda872eab4d6d7be1e0afb6cae (patch)
tree: e9d8ca3d7429bfa48e823a6f105d76254e34daf0 /mm
parent: bb86a7338b7a864c03e1736a8d370b10254b0300 (diff)
1 files changed, 28 insertions, 8 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index ed70a68e882..02c09f33df8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1811,10 +1811,10 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /*
- * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the tail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
+ * munmap path where it doesn't make sense to fail.
  */
-int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	      unsigned long addr, int new_below)
 {
 	struct mempolicy *pol;
@@ -1824,9 +1824,6 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 					~(huge_page_mask(hstate_vma(vma)))))
 		return -EINVAL;
 
-	if (mm->map_count >= sysctl_max_map_count)
-		return -ENOMEM;
-
 	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
@@ -1866,6 +1863,19 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	return 0;
 }
 
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the tail.
+ */
+int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+	      unsigned long addr, int new_below)
+{
+	if (mm->map_count >= sysctl_max_map_count)
+		return -ENOMEM;
+
+	return __split_vma(mm, vma, addr, new_below);
+}
+
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
  * work.  This now handles partial unmappings.
@@ -1901,7 +1911,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	 * places tmp vma above, and higher split_vma places tmp vma below.
 	 */
 	if (start > vma->vm_start) {
-		int error = split_vma(mm, vma, start, 0);
+		int error;
+
+		/*
+		 * Make sure that map_count on return from munmap() will
+		 * not exceed its limit; but let map_count go just above
+		 * its limit temporarily, to help free resources as expected.
+		 */
+		if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+			return -ENOMEM;
+
+		error = __split_vma(mm, vma, start, 0);
 		if (error)
 			return error;
 		prev = vma;
@@ -1910,7 +1930,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 	/* Does it split the last one? */
 	last = find_vma(mm, end);
 	if (last && end > last->vm_start) {
-		int error = split_vma(mm, last, end, 1);
+		int error = __split_vma(mm, last, end, 1);
 		if (error)
 			return error;
 	}
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>	2009-12-14 17:57:56 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-12-15 08:53:11 -0800
commit	659ace584e7a9fdda872eab4d6d7be1e0afb6cae (patch)
tree	e9d8ca3d7429bfa48e823a6f105d76254e34daf0 /mm
parent	bb86a7338b7a864c03e1736a8d370b10254b0300 (diff)