mm: avoid anon_vma_chain allocation under anon_vma lock

Hugh Dickins points out that lockdep (correctly) spots a potential deadlock on the anon_vma lock, because we now do a GFP_KERNEL allocation of anon_vma_chain while doing anon_vma_clone(). The problem is that page reclaim will want to take the anon_vma lock of any anonymous pages that it will try to reclaim. So re-organize the code in anon_vma_clone() slightly: first do just a GFP_NOWAIT allocation, which will usually work fine. But if that fails, let's just drop the lock and re-do the allocation, now with GFP_KERNEL. End result: not only do we avoid the locking problem, this also ends up getting better concurrency in case the allocation does need to block. Tim Chen reports that with all these anon_vma locking tweaks, we're now almost back up to the spinlock performance. Reported-and-tested-by: Hugh Dickins <hughd@google.com> Tested-by: Tim Chen <tim.c.chen@linux.intel.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andi Kleen <ak@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-06-17 19:05:36 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-06-17 19:24:11 -0700
commit: dd34739c03f2f9a79403d33419c2e61e11b4c403 (patch)
tree: 74e4e43b78d54994e88aef7df96f0f8f4b080934 /mm
parent: eee2acbae95555006307395d8a6c91452d62851d (diff)
1 files changed, 12 insertions, 8 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 68756a77ef8..27dfd3b82b0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -112,9 +112,9 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
-static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
+static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
 {
-	return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
+	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
 }
 
 static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
@@ -159,7 +159,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		struct mm_struct *mm = vma->vm_mm;
 		struct anon_vma *allocated;
 
-		avc = anon_vma_chain_alloc();
+		avc = anon_vma_chain_alloc(GFP_KERNEL);
 		if (!avc)
 			goto out_enomem;
 
@@ -253,9 +253,14 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma;
 
-		avc = anon_vma_chain_alloc();
-		if (!avc)
-			goto enomem_failure;
+		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
+		if (unlikely(!avc)) {
+			unlock_anon_vma_root(root);
+			root = NULL;
+			avc = anon_vma_chain_alloc(GFP_KERNEL);
+			if (!avc)
+				goto enomem_failure;
+		}
 		anon_vma = pavc->anon_vma;
 		root = lock_anon_vma_root(root, anon_vma);
 		anon_vma_chain_link(dst, avc, anon_vma);
@@ -264,7 +269,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	return 0;
 
  enomem_failure:
-	unlock_anon_vma_root(root);
 	unlink_anon_vmas(dst);
 	return -ENOMEM;
 }
@@ -294,7 +298,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	anon_vma = anon_vma_alloc();
 	if (!anon_vma)
 		goto out_error;
-	avc = anon_vma_chain_alloc();
+	avc = anon_vma_chain_alloc(GFP_KERNEL);
 	if (!avc)
 		goto out_error_free_anon_vma;
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-06-17 19:05:36 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-06-17 19:24:11 -0700
commit	dd34739c03f2f9a79403d33419c2e61e11b4c403 (patch)
tree	74e4e43b78d54994e88aef7df96f0f8f4b080934 /mm
parent	eee2acbae95555006307395d8a6c91452d62851d (diff)