From 4fbf4291aa15926cd4fdca0ffe9122e89d0459db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:04 +0000 Subject: FS-Cache: Allow the current state of all objects to be dumped Allow the current state of all fscache objects to be dumped by doing: cat /proc/fs/fscache/objects By default, all objects and all fields will be shown. This can be restricted by adding a suitable key to one of the caller's keyrings (such as the session keyring): keyctl add user fscache:objlist "" @s The are: K Show hexdump of object key (don't show if not given) A Show hexdump of object aux data (don't show if not given) And paired restrictions: C Show objects that have a cookie c Show objects that don't have a cookie B Show objects that are busy b Show objects that aren't busy W Show objects that have pending writes w Show objects that don't have pending writes R Show objects that have outstanding reads r Show objects that don't have outstanding reads S Show objects that have slow work queued s Show objects that don't have slow work queued If neither side of a restriction pair is given, then both are implied. For example: keyctl add user fscache:objlist KB @s shows objects that are busy, and lists their object keys, but does not dump their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is not implied. Signed-off-by: David Howells --- fs/cachefiles/interface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/cachefiles/interface.c') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 431accd475a..dd7f852746c 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -331,6 +331,7 @@ static void cachefiles_put_object(struct fscache_object *_object) } cache = object->fscache.cache; + fscache_object_destroy(&object->fscache); kmem_cache_free(cachefiles_object_jar, object); fscache_object_destroyed(cache); } -- cgit v1.2.3-70-g09d2 From a17754fb8c28af19cd70dcbec6d5b0773b94e0c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:52 +0000 Subject: CacheFiles: Don't write a full page if there's only a partial page to cache cachefiles_write_page() writes a full page to the backing file for the last page of the netfs file, even if the netfs file's last page is only a partial page. This causes the EOF on the backing file to be extended beyond the EOF of the netfs, and thus the backing file will be truncated by cachefiles_attr_changed() called from cachefiles_lookup_object(). So we need to limit the write we make to the backing file on that last page such that it doesn't push the EOF too far. Also, if a backing file that has a partial page at the end is expanded, we discard the partial page and refetch it on the basis that we then have a hole in the file with invalid data, and should the power go out... A better way to deal with this could be to record a note that the partial page contains invalid data until the correct data is written into it. This isn't a problem for netfs's that discard the whole backing file if the file size changes (such as NFS). Signed-off-by: David Howells --- fs/cachefiles/interface.c | 20 +++++++++++++++++--- fs/cachefiles/rdwr.c | 23 +++++++++++++++++++---- include/linux/fscache-cache.h | 3 +++ 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'fs/cachefiles/interface.c') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index dd7f852746c..8e67abf0598 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -404,12 +404,26 @@ static int cachefiles_attr_changed(struct fscache_object *_object) if (oi_size == ni_size) return 0; - newattrs.ia_size = ni_size; - newattrs.ia_valid = ATTR_SIZE; - cachefiles_begin_secure(cache, &saved_cred); mutex_lock(&object->backer->d_inode->i_mutex); + + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = notify_change(object->backer, &newattrs); + if (ret < 0) + goto truncate_failed; + } + + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; ret = notify_change(object->backer, &newattrs); + +truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); cachefiles_end_secure(cache, saved_cred); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3304646dae8..5a84fd7109a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -803,7 +803,8 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) struct cachefiles_cache *cache; mm_segment_t old_fs; struct file *file; - loff_t pos; + loff_t pos, eof; + size_t len; void *data; int ret; @@ -837,15 +838,29 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } + } + data = kmap(page); old_fs = get_fs(); set_fs(KERNEL_DS); ret = file->f_op->write( - file, (const void __user *) data, PAGE_SIZE, - &pos); + file, (const void __user *) data, len, &pos); set_fs(old_fs); kunmap(page); - if (ret != PAGE_SIZE) + if (ret != len) ret = -EIO; } fput(file); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 907bb56c588..5db50002f3b 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -395,6 +395,7 @@ struct fscache_object { struct rb_node objlist_link; /* link in global object list */ #endif pgoff_t store_limit; /* current storage limit */ + loff_t store_limit_l; /* current storage limit */ }; extern const char *fscache_object_states[]; @@ -439,6 +440,7 @@ void fscache_object_init(struct fscache_object *object, object->events = object->event_mask = 0; object->flags = 0; object->store_limit = 0; + object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; object->parent = NULL; @@ -491,6 +493,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object) static inline void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) { + object->store_limit_l = i_size; object->store_limit = i_size >> PAGE_SHIFT; if (i_size & ~PAGE_MASK) object->store_limit++; -- cgit v1.2.3-70-g09d2 From fee096deb4f33897937b974cb2c5168bab7935be Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:05 +0000 Subject: CacheFiles: Catch an overly long wait for an old active object Catch an overly long wait for an old, dying active object when we want to replace it with a new one. The probability is that all the slow-work threads are hogged, and the delete can't get a look in. What we do instead is: (1) if there's nothing in the slow work queue, we sleep until either the dying object has finished dying or there is something in the slow work queue behind which we can queue our object. (2) if there is something in the slow work queue, we return ETIMEDOUT to fscache_lookup_object(), which then puts us back on the slow work queue, presumably behind the deletion that we're blocked by. We are then deferred for a while until we work our way back through the queue - without blocking a slow-work thread unnecessarily. A backtrace similar to the following may appear in the log without this patch: INFO: task kslowd004:5711 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kslowd004 D 0000000000000000 0 5711 2 0x00000080 ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000 ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8 Call Trace: [] ? trace_hardirqs_on+0xd/0xf [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] cachefiles_wait_bit+0x9/0xd [cachefiles] [] __wait_on_bit+0x43/0x76 [] ? ext3_xattr_get+0x1ec/0x270 [] out_of_line_wait_on_bit+0x69/0x74 [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] ? wake_bit_function+0x0/0x2e [] cachefiles_mark_object_active+0x203/0x23b [cachefiles] [] cachefiles_walk_to_object+0x558/0x827 [cachefiles] [] cachefiles_lookup_object+0xac/0x12a [cachefiles] [] fscache_lookup_object+0x1c7/0x214 [fscache] [] fscache_object_state_machine+0xa5/0x52d [fscache] [] fscache_object_slow_work_execute+0x5f/0xa0 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 1 lock held by kslowd004/5711: #0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles] Signed-off-by: David Howells --- Documentation/filesystems/caching/fscache.txt | 1 + fs/cachefiles/interface.c | 6 +- fs/cachefiles/namei.c | 87 +++++++++++++++++++++------ fs/fscache/internal.h | 1 + fs/fscache/object.c | 10 ++- fs/fscache/stats.c | 4 +- include/linux/fscache-cache.h | 6 +- 7 files changed, 90 insertions(+), 25 deletions(-) (limited to 'fs/cachefiles/interface.c') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 3c23411956b..a91e2e2095b 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -235,6 +235,7 @@ proc files. neg=N Number of negative lookups made pos=N Number of positive lookups made crt=N Number of objects created by lookup + tmo=N Number of lookups timed out and requeued Updates n=N Number of update cookie requests seen nul=N Number of upd reqs given a NULL parent run=N Number of upd reqs granted CPU time diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 8e67abf0598..9d3c426044a 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -114,8 +114,9 @@ nomem_lookup_data: /* * attempt to look up the nominated node in this cache + * - return -ETIMEDOUT to be scheduled again */ -static void cachefiles_lookup_object(struct fscache_object *_object) +static int cachefiles_lookup_object(struct fscache_object *_object) { struct cachefiles_lookup_data *lookup_data; struct cachefiles_object *parent, *object; @@ -145,13 +146,14 @@ static void cachefiles_lookup_object(struct fscache_object *_object) object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) cachefiles_attr_changed(&object->fscache); - if (ret < 0) { + if (ret < 0 && ret != -ETIMEDOUT) { printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } _leave(" [%d]", ret); + return ret; } /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 00a0cda8f47..14ac4806e29 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -21,12 +21,6 @@ #include #include "internal.h" -static int cachefiles_wait_bit(void *flags) -{ - schedule(); - return 0; -} - #define CACHEFILES_KEYBUF_SIZE 512 /* @@ -100,8 +94,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, /* * record the fact that an object is now active */ -static void cachefiles_mark_object_active(struct cachefiles_cache *cache, - struct cachefiles_object *object) +static int cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) { struct cachefiles_object *xobject; struct rb_node **_p, *_parent = NULL; @@ -139,8 +133,8 @@ try_again: rb_insert_color(&object->active_node, &cache->active_nodes); write_unlock(&cache->active_lock); - _leave(""); - return; + _leave(" = 0"); + return 0; /* an old object from a previous incarnation is hogging the slot - we * need to wait for it to be destroyed */ @@ -155,13 +149,64 @@ wait_for_old_object: atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); - _debug(">>> wait"); - wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, - cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); - _debug("<<< waited"); + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + wait_queue_head_t *wq; + + signed long timeout = 60 * HZ; + wait_queue_t wait; + bool requeue; + + /* if the object we're waiting for is queued for processing, + * then just put ourselves on the queue behind it */ + if (slow_work_is_queued(&xobject->fscache.work)) { + _debug("queue OBJ%x behind OBJ%x immediately", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + /* otherwise we sleep until either the object we're waiting for + * is done, or the slow-work facility wants the thread back to + * do other work */ + wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) + break; + requeue = slow_work_sleep_till_thread_needed( + &object->fscache.work, &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + + if (requeue && + test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + _debug("queue OBJ%x behind OBJ%x after wait", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + if (timeout <= 0) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error: Overlong" + " wait for old active object to go away\n"); + cachefiles_printk_object(object, xobject); + goto requeue; + } + } + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); cache->cache.ops->put_object(&xobject->fscache); goto try_again; + +requeue: + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + cache->cache.ops->put_object(&xobject->fscache); + _leave(" = -ETIMEDOUT"); + return -ETIMEDOUT; } /* @@ -466,12 +511,15 @@ lookup_again: } /* note that we're now using this object */ - cachefiles_mark_object_active(cache, object); + ret = cachefiles_mark_object_active(cache, object); mutex_unlock(&dir->d_inode->i_mutex); dput(dir); dir = NULL; + if (ret == -ETIMEDOUT) + goto mark_active_timed_out; + _debug("=== OBTAINED_OBJECT ==="); if (object->new) { @@ -515,6 +563,10 @@ create_error: cachefiles_io_error(cache, "Create/mkdir failed"); goto error; +mark_active_timed_out: + _debug("mark active timed out"); + goto release_dentry; + check_error: _debug("check error %d", ret); write_lock(&cache->active_lock); @@ -522,7 +574,7 @@ check_error: clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); write_unlock(&cache->active_lock); - +release_dentry: dput(object->dentry); object->dentry = NULL; goto error_out; @@ -543,9 +595,6 @@ error: error_out2: dput(dir); error_out: - if (ret == -ENOSPC) - ret = -ENOBUFS; - _leave(" = error %d", -ret); return ret; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 5b49a373689..0ca2566e038 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -215,6 +215,7 @@ extern atomic_t fscache_n_object_no_alloc; extern atomic_t fscache_n_object_lookups; extern atomic_t fscache_n_object_lookups_negative; extern atomic_t fscache_n_object_lookups_positive; +extern atomic_t fscache_n_object_lookups_timed_out; extern atomic_t fscache_n_object_created; extern atomic_t fscache_n_object_avail; extern atomic_t fscache_n_object_dead; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index f3f952cf887..e513ac599c8 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -468,6 +468,7 @@ static void fscache_lookup_object(struct fscache_object *object) { struct fscache_cookie *cookie = object->cookie; struct fscache_object *parent; + int ret; _enter(""); @@ -493,12 +494,19 @@ static void fscache_lookup_object(struct fscache_object *object) fscache_stat(&fscache_n_object_lookups); fscache_stat(&fscache_n_cop_lookup_object); - object->cache->ops->lookup_object(object); + ret = object->cache->ops->lookup_object(object); fscache_stat_d(&fscache_n_cop_lookup_object); if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + if (ret == -ETIMEDOUT) { + /* probably stuck behind another object, so move this one to + * the back of the queue */ + fscache_stat(&fscache_n_object_lookups_timed_out); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } + _leave(""); } diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 05f77caf4a2..46435f3aae6 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -98,6 +98,7 @@ atomic_t fscache_n_object_no_alloc; atomic_t fscache_n_object_lookups; atomic_t fscache_n_object_lookups_negative; atomic_t fscache_n_object_lookups_positive; +atomic_t fscache_n_object_lookups_timed_out; atomic_t fscache_n_object_created; atomic_t fscache_n_object_avail; atomic_t fscache_n_object_dead; @@ -160,10 +161,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_acquires_nobufs), atomic_read(&fscache_n_acquires_oom)); - seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", + seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u tmo=%u\n", atomic_read(&fscache_n_object_lookups), atomic_read(&fscache_n_object_lookups_negative), atomic_read(&fscache_n_object_lookups_positive), + atomic_read(&fscache_n_object_lookups_timed_out), atomic_read(&fscache_n_object_created)); seq_printf(m, "Updates: n=%u nul=%u run=%u\n", diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 5db50002f3b..7be0c6fbe88 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -234,8 +234,10 @@ struct fscache_cache_ops { struct fscache_object *(*alloc_object)(struct fscache_cache *cache, struct fscache_cookie *cookie); - /* look up the object for a cookie */ - void (*lookup_object)(struct fscache_object *object); + /* look up the object for a cookie + * - return -ETIMEDOUT to be requeued + */ + int (*lookup_object)(struct fscache_object *object); /* finished looking up */ void (*lookup_complete)(struct fscache_object *object); -- cgit v1.2.3-70-g09d2 From 14e69647c868459bcb910f771851ca7c699efd21 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:08 +0000 Subject: CacheFiles: Don't log lookup/create failing with ENOBUFS Don't log the CacheFiles lookup/create object routined failing with ENOBUFS as under high memory load or high cache load they can do this quite a lot. This error simply means that the requested object cannot be created on disk due to lack of space, or due to failure of the backing filesystem to find sufficient resources. Signed-off-by: David Howells --- fs/cachefiles/interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/cachefiles/interface.c') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 9d3c426044a..27089311fbe 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -147,8 +147,9 @@ static int cachefiles_lookup_object(struct fscache_object *_object) cachefiles_attr_changed(&object->fscache); if (ret < 0 && ret != -ETIMEDOUT) { - printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", - ret); + if (ret != -ENOBUFS) + printk(KERN_WARNING + "CacheFiles: Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } -- cgit v1.2.3-70-g09d2