From b34df792b4e9e311db47fad27949095d0629c197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:14 +0000 Subject: FS-Cache: Use radix tree preload correctly in tracking of pages to be stored __fscache_write_page() attempts to load the radix tree preallocation pool for the CPU it is on before calling radix_tree_insert(), as the insertion must be done inside a pair of spinlocks. Use of the preallocation pool, however, is contingent on the radix tree being initialised without __GFP_WAIT specified. __fscache_acquire_cookie() was passing GFP_NOFS to INIT_RADIX_TREE() - but that includes __GFP_WAIT. The solution is to AND out __GFP_WAIT. Additionally, the banner comment to radix_tree_preload() is altered to make note of this prerequisite. Possibly there should be a WARN_ON() too. Without this fix, I have seen the following recursive deadlock caused by radix_tree_insert() attempting to allocate memory inside the spinlocked region, which resulted in FS-Cache being called back into to release memory - which required the spinlock already held. ============================================= [ INFO: possible recursive locking detected ] 2.6.32-rc6-cachefs #24 --------------------------------------------- nfsiod/7916 is trying to acquire lock: (&cookie->lock){+.+.-.}, at: [] __fscache_uncache_page+0xdb/0x160 [fscache] but task is already holding lock: (&cookie->lock){+.+.-.}, at: [] __fscache_write_page+0x15c/0x3f3 [fscache] other info that might help us debug this: 5 locks held by nfsiod/7916: #0: (nfsiod){+.+.+.}, at: [] worker_thread+0x19a/0x2e2 #1: (&task->u.tk_work#2){+.+.+.}, at: [] worker_thread+0x19a/0x2e2 #2: (&cookie->lock){+.+.-.}, at: [] __fscache_write_page+0x15c/0x3f3 [fscache] #3: (&object->lock#2){+.+.-.}, at: [] __fscache_write_page+0x197/0x3f3 [fscache] #4: (&cookie->stores_lock){+.+...}, at: [] __fscache_write_page+0x19f/0x3f3 [fscache] stack backtrace: Pid: 7916, comm: nfsiod Not tainted 2.6.32-rc6-cachefs #24 Call Trace: [] __lock_acquire+0x1649/0x16e3 [] ? __lock_acquire+0x7b7/0x16e3 [] ? dump_trace+0x248/0x257 [] lock_acquire+0x57/0x6d [] ? __fscache_uncache_page+0xdb/0x160 [fscache] [] _spin_lock+0x2c/0x3b [] ? __fscache_uncache_page+0xdb/0x160 [fscache] [] __fscache_uncache_page+0xdb/0x160 [fscache] [] ? __fscache_check_page_write+0x0/0x71 [fscache] [] nfs_fscache_release_page+0x86/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] ? mark_held_locks+0x52/0x70 [] ? _spin_unlock_irq+0x2b/0x31 [] shrink_inactive_list+0x392/0x67c [] ? mark_held_locks+0x52/0x70 [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] cache_alloc_refill+0x34d/0x6c1 [] ? radix_tree_node_alloc+0x52/0x5c [] kmem_cache_alloc+0xb2/0x118 [] radix_tree_node_alloc+0x52/0x5c [] radix_tree_insert+0x57/0x19c [] __fscache_write_page+0x1e3/0x3f3 [fscache] [] __nfs_readpage_to_fscache+0x58/0x11e [nfs] [] nfs_readpage_release+0x34/0x9b [nfs] [] nfs_readpage_release_full+0x32/0x4b [nfs] [] rpc_release_calldata+0x12/0x14 [sunrpc] [] rpc_free_task+0x59/0x61 [sunrpc] [] rpc_async_release+0x10/0x12 [sunrpc] [] worker_thread+0x1ef/0x2e2 [] ? worker_thread+0x19a/0x2e2 [] ? thread_return+0x3e/0x101 [] ? rpc_async_release+0x0/0x12 [sunrpc] [] ? autoremove_wake_function+0x0/0x34 [] ? trace_hardirqs_on+0xd/0xf [] ? worker_thread+0x0/0x2e2 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? add_wait_queue+0x15/0x44 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 Signed-off-by: David Howells --- lib/radix-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/radix-tree.c') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd93cae..ae610685556 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -200,6 +200,9 @@ radix_tree_node_free(struct radix_tree_node *node) * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { -- cgit v1.2.3-70-g09d2 From 285e728b0ac55b53a673114096168d6f74930167 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:29 +0000 Subject: FS-Cache: Don't delete pending pages from the page-store tracking tree Don't delete pending pages from the page-store tracking tree, but rather send them for another write as they've presumably been updated. Signed-off-by: David Howells --- fs/fscache/page.c | 7 +++++-- lib/radix-tree.c | 2 -- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/radix-tree.c') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3ea8897bc21..022a5da8e13 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -57,8 +57,11 @@ static void fscache_end_page_write(struct fscache_object *object, /* delete the page from the tree if it is now no longer * pending */ spin_lock(&cookie->stores_lock); - fscache_stat(&fscache_n_store_radix_deletes); - xpage = radix_tree_delete(&cookie->stores, page->index); + if (!radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG)) { + fscache_stat(&fscache_n_store_radix_deletes); + xpage = radix_tree_delete(&cookie->stores, page->index); + } spin_unlock(&cookie->stores_lock); wake_up_bit(&cookie->flags, 0); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ae610685556..92cdd9936e3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -546,7 +546,6 @@ out: } EXPORT_SYMBOL(radix_tree_tag_clear); -#ifndef __KERNEL__ /* Only the test harness uses this at present */ /** * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root @@ -609,7 +608,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, } } EXPORT_SYMBOL(radix_tree_tag_get); -#endif /** * radix_tree_next_hole - find the next hole (not-present entry) -- cgit v1.2.3-70-g09d2