diff options
Diffstat (limited to 'fs')
56 files changed, 10413 insertions, 367 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index ae3b34a2ea6..86b203fc3c5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -66,6 +66,13 @@ config GENERIC_ACL bool select FS_POSIX_ACL +menu "Caches" + +source "fs/fscache/Kconfig" +source "fs/cachefiles/Kconfig" + +endmenu + if BLOCK menu "CD-ROM/DVD Filesystems" diff --git a/fs/Makefile b/fs/Makefile index 15f73014a20..70b2aed8713 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line +obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 obj-$(CONFIG_EXT2_FS) += ext2/ @@ -116,6 +117,7 @@ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ +obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index e7b522fe15e..5c4e61d3c77 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -19,3 +19,11 @@ config AFS_DEBUG See <file:Documentation/filesystems/afs.txt> for more information. If unsure, say N. + +config AFS_FSCACHE + bool "Provide AFS client caching support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y + help + Say Y here if you want AFS data to be cached locally on disk through + the generic filesystem cache manager diff --git a/fs/afs/Makefile b/fs/afs/Makefile index a66671082cf..4f64b95d57b 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -2,7 +2,10 @@ # Makefile for Red Hat Linux AFS client. # +afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o + kafs-objs := \ + $(afs-cache-y) \ callback.o \ cell.o \ cmservice.o \ diff --git a/fs/afs/cache.c b/fs/afs/cache.c index de0d7de69ed..e2b1d3f1651 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -1,6 +1,6 @@ /* AFS caching stuff * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -9,248 +9,395 @@ * 2 of the License, or (at your option) any later version. */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry); -static void afs_cell_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_cache_cell_index_def = { - .name = "cell_ix", - .data_size = sizeof(struct afs_cache_cell), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_cell_cache_match, - .update = afs_cell_cache_update, +#include <linux/slab.h> +#include <linux/sched.h> +#include "internal.h" + +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); + +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static enum fscache_checkaux afs_vlocation_cache_check_aux( + void *cookie_netfs_data, const void *buffer, uint16_t buflen); + +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); + +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size); +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data); + +struct fscache_netfs afs_cache_netfs = { + .name = "afs", + .version = 0, +}; + +struct fscache_cookie_def afs_cell_cache_index_def = { + .name = "AFS.cell", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_cell_cache_get_key, + .get_aux = afs_cell_cache_get_aux, + .check_aux = afs_cell_cache_check_aux, +}; + +struct fscache_cookie_def afs_vlocation_cache_index_def = { + .name = "AFS.vldb", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_vlocation_cache_get_key, + .get_aux = afs_vlocation_cache_get_aux, + .check_aux = afs_vlocation_cache_check_aux, +}; + +struct fscache_cookie_def afs_volume_cache_index_def = { + .name = "AFS.volume", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_volume_cache_get_key, +}; + +struct fscache_cookie_def afs_vnode_cache_index_def = { + .name = "AFS.vnode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = afs_vnode_cache_get_key, + .get_attr = afs_vnode_cache_get_attr, + .get_aux = afs_vnode_cache_get_aux, + .check_aux = afs_vnode_cache_check_aux, + .now_uncached = afs_vnode_cache_now_uncached, }; -#endif /* - * match a cell record obtained from the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry) +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = target; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t klen; - _enter("{%s},{%s}", ccell->name, cell->name); + _enter("%p,%p,%u", cell, buffer, bufmax); - if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } + klen = strlen(cell->name); + if (klen > bufmax) + return 0; - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + memcpy(buffer, cell->name, klen); + return klen; } -#endif /* - * update a cell record in the cache + * provide new auxilliary cache data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_cell_cache_update(void *source, void *entry) +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = source; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t dlen; - _enter("%p,%p", source, entry); + _enter("%p,%p,%u", cell, buffer, bufmax); - strncpy(ccell->name, cell->name, sizeof(ccell->name)); + dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); + dlen = min(dlen, bufmax); + dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); - memcpy(ccell->vl_servers, - cell->vl_addrs, - min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); + memcpy(buffer, cell->vl_addrs, dlen); + return dlen; +} +/* + * check that the auxilliary data indicates that the entry is still valid + */ +static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; } -#endif - -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry); -static void afs_vlocation_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vlocation_cache_index_def = { - .name = "vldb", - .data_size = sizeof(struct afs_cache_vlocation), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_vlocation_cache_match, - .update = afs_vlocation_cache_update, -}; -#endif +/*****************************************************************************/ /* - * match a VLDB record stored in the cache - * - may also load target from entry + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry) +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = target; + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t klen; + + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); + + klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); + if (klen > bufmax) + return 0; - _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); + memcpy(buffer, vlocation->vldb.name, klen); - if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 - ) { - if (!vlocation->valid || - vlocation->vldb.rtime == vldb->rtime + _leave(" = %u", klen); + return klen; +} + +/* + * provide new auxilliary cache data + */ +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; + + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); + + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen > bufmax) + return 0; + + memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); + + _leave(" = %u", dlen); + return dlen; +} + +/* + * check that the auxilliary data indicates that the entry is still valid + */ +static +enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + const struct afs_cache_vlocation *cvldb; + struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; + + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen != buflen) + return FSCACHE_CHECKAUX_OBSOLETE; + + cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); + + /* if what's on disk is more valid than what's in memory, then use the + * VL record from the cache */ + if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { + memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); + vlocation->valid = 1; + _leave(" = SUCCESS [c->m]"); + return FSCACHE_CHECKAUX_OKAY; + } + + /* need to update the cache if the cached info differs */ + if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { + /* delete if the volume IDs for this name differ */ + if (memcmp(&vlocation->vldb.vid, &cvldb->vid, + sizeof(cvldb->vid)) != 0 ) { - vlocation->vldb = *vldb; - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return CACHEFS_MATCH_SUCCESS; - } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { - /* delete if VIDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, - &vldb->vid, - sizeof(vldb->vid)) != 0) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; - } - - _leave(" = UPDATE"); - return CACHEFS_MATCH_SUCCESS_UPDATE; - } else { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; + _leave(" = OBSOLETE"); + return FSCACHE_CHECKAUX_OBSOLETE; } + + _leave(" = UPDATE"); + return FSCACHE_CHECKAUX_NEEDS_UPDATE; } - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; } -#endif +/*****************************************************************************/ /* - * update a VLDB record stored in the cache + * set the key for the volume index entry */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vlocation_cache_update(void *source, void *entry) +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = source; + const struct afs_volume *volume = cookie_netfs_data; + uint16_t klen; + + _enter("{%u},%p,%u", volume->type, buffer, bufmax); + + klen = sizeof(volume->type); + if (klen > bufmax) + return 0; - _enter(""); + memcpy(buffer, &volume->type, sizeof(volume->type)); + + _leave(" = %u", klen); + return klen; - *vldb = vlocation->vldb; } -#endif - -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry); -static void afs_volume_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_volume_cache_index_def = { - .name = "volume", - .data_size = sizeof(struct afs_cache_vhash), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .match = afs_volume_cache_match, - .update = afs_volume_cache_update, -}; -#endif +/*****************************************************************************/ /* - * match a volume hash record stored in the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry) +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = target; + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t klen; - _enter("{%u},{%u}", volume->type, vhash->vtype); + _enter("{%x,%x,%llx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, + buffer, bufmax); - if (volume->type == vhash->vtype) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } + klen = sizeof(vnode->fid.vnode); + if (klen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + _leave(" = %u", klen); + return klen; } -#endif /* - * update a volume hash record stored in the cache + * provide updated file attributes */ -#ifdef AFS_CACHING_SUPPORT -static void afs_volume_cache_update(void *source, void *entry) +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size) { - struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = source; + const struct afs_vnode *vnode = cookie_netfs_data; - _enter(""); + _enter("{%x,%x,%llx},", + vnode->fid.vnode, vnode->fid.unique, + vnode->status.data_version); - vhash->vtype = volume->type; + *size = vnode->status.size; } -#endif - -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry); -static void afs_vnode_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vnode_cache_index_def = { - .name = "vnode", - .data_size = sizeof(struct afs_cache_vnode), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, - .match = afs_vnode_cache_match, - .update = afs_vnode_cache_update, -}; -#endif /* - * match a vnode record stored in the cache + * provide new auxilliary cache data + */ +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, + buffer, bufmax); + + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); + buffer += sizeof(vnode->fid.unique); + memcpy(buffer, &vnode->status.data_version, + sizeof(vnode->status.data_version)); + + _leave(" = %u", dlen); + return dlen; +} + +/* + * check that the auxilliary data indicates that the entry is still valid */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry) +static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) { - const struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = target; - - _enter("{%x,%x,%Lx},{%x,%x,%Lx}", - vnode->fid.vnode, - vnode->fid.unique, - vnode->status.version, - cvnode->vnode_id, - cvnode->vnode_unique, - cvnode->data_version); - - if (vnode->fid.vnode != cvnode->vnode_id) { - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%llx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, + buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); + if (dlen != buflen) { + _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); + return FSCACHE_CHECKAUX_OBSOLETE; } - if (vnode->fid.unique != cvnode->vnode_unique || - vnode->status.version != cvnode->data_version) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; + if (memcmp(buffer, + &vnode->fid.unique, + sizeof(vnode->fid.unique) + ) != 0) { + unsigned unique; + + memcpy(&unique, buffer, sizeof(unique)); + + _leave(" = OBSOLETE [uniq %x != %x]", + unique, vnode->fid.unique); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + if (memcmp(buffer + sizeof(vnode->fid.unique), + &vnode->status.data_version, + sizeof(vnode->status.data_version) + ) != 0) { + afs_dataversion_t version; + + memcpy(&version, buffer + sizeof(vnode->fid.unique), + sizeof(version)); + + _leave(" = OBSOLETE [vers %llx != %llx]", + version, vnode->status.data_version); + return FSCACHE_CHECKAUX_OBSOLETE; } _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; + return FSCACHE_CHECKAUX_OKAY; } -#endif /* - * update a vnode record stored in the cache + * indication the cookie is no longer uncached + * - this function is called when the backing store currently caching a cookie + * is removed + * - the netfs should use this to clean up any markers indicating cached pages + * - this is mandatory for any object that may have data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vnode_cache_update(void *source, void *entry) +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data) { - struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = source; + struct afs_vnode *vnode = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + _enter("{%x,%x,%Lx}", + vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version); + + pagevec_init(&pvec, 0); + first = 0; + + for (;;) { + /* grab a bunch of pages to clean */ + nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; - _enter(""); + for (loop = 0; loop < nr_pages; loop++) + ClearPageFsCache(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } - cvnode->vnode_id = vnode->fid.vnode; - cvnode->vnode_unique = vnode->fid.unique; - cvnode->data_version = vnode->status.version; + _leave(""); } -#endif diff --git a/fs/afs/cache.h b/fs/afs/cache.h index 36a3642cf90..5c4f6b499e9 100644 --- a/fs/afs/cache.h +++ b/fs/afs/cache.h @@ -1,6 +1,6 @@ /* AFS local cache management interface * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -9,15 +9,4 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef AFS_CACHE_H -#define AFS_CACHE_H - -#undef AFS_CACHING_SUPPORT - -#include <linux/mm.h> -#ifdef AFS_CACHING_SUPPORT -#include <linux/cachefs.h> -#endif -#include "types.h" - -#endif /* AFS_CACHE_H */ +#include <linux/fscache.h> diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5e1df14e16b..e19c13f059e 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -147,12 +147,11 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist) if (ret < 0) goto error; -#ifdef AFS_CACHING_SUPPORT - /* put it up for caching */ - cachefs_acquire_cookie(afs_cache_netfs.primary_index, - &afs_vlocation_cache_index_def, - cell, - &cell->cache); +#ifdef CONFIG_AFS_FSCACHE + /* put it up for caching (this never returns an error) */ + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell); #endif /* add to the cell lists */ @@ -362,10 +361,9 @@ static void afs_cell_destroy(struct afs_cell *cell) list_del_init(&cell->proc_link); up_write(&afs_proc_cells_sem); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(cell->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(cell->cache, 0); #endif - key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/file.c b/fs/afs/file.c index a3901769a96..7a1d942ef68 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -23,6 +23,9 @@ static void afs_invalidatepage(struct page *page, unsigned long offset); static int afs_releasepage(struct page *page, gfp_t gfp_flags); static int afs_launder_page(struct page *page); +static int afs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages); + const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, @@ -46,6 +49,7 @@ const struct inode_operations afs_file_inode_operations = { const struct address_space_operations afs_fs_aops = { .readpage = afs_readpage, + .readpages = afs_readpages, .set_page_dirty = afs_set_page_dirty, .launder_page = afs_launder_page, .releasepage = afs_releasepage, @@ -101,37 +105,18 @@ int afs_release(struct inode *inode, struct file *file) /* * deal with notification that a page was read from the cache */ -#ifdef AFS_CACHING_SUPPORT -static void afs_readpage_read_complete(void *cookie_data, - struct page *page, - void *data, - int error) +static void afs_file_readpage_read_complete(struct page *page, + void *data, + int error) { - _enter("%p,%p,%p,%d", cookie_data, page, data, error); + _enter("%p,%p,%d", page, data, error); - if (error) - SetPageError(page); - else + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) SetPageUptodate(page); unlock_page(page); - } -#endif - -/* - * deal with notification that a page was written to the cache - */ -#ifdef AFS_CACHING_SUPPORT -static void afs_readpage_write_complete(void *cookie_data, - struct page *page, - void *data, - int error) -{ - _enter("%p,%p,%p,%d", cookie_data, page, data, error); - - unlock_page(page); -} -#endif /* * AFS read page from file, directory or symlink @@ -161,9 +146,9 @@ static int afs_readpage(struct file *file, struct page *page) if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) goto error; -#ifdef AFS_CACHING_SUPPORT /* is it cached? */ - ret = cachefs_read_or_alloc_page(vnode->cache, +#ifdef CONFIG_AFS_FSCACHE + ret = fscache_read_or_alloc_page(vnode->cache, page, afs_file_readpage_read_complete, NULL, @@ -171,20 +156,21 @@ static int afs_readpage(struct file *file, struct page *page) #else ret = -ENOBUFS; #endif - switch (ret) { - /* read BIO submitted and wb-journal entry found */ - case 1: - BUG(); // TODO - handle wb-journal match - /* read BIO submitted (page in cache) */ case 0: break; - /* no page available in cache */ - case -ENOBUFS: + /* page not yet cached */ case -ENODATA: + _debug("cache said ENODATA"); + goto go_on; + + /* page will not be cached */ + case -ENOBUFS: + _debug("cache said ENOBUFS"); default: + go_on: offset = page->index << PAGE_CACHE_SHIFT; len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); @@ -198,27 +184,25 @@ static int afs_readpage(struct file *file, struct page *page) set_bit(AFS_VNODE_DELETED, &vnode->flags); ret = -ESTALE; } -#ifdef AFS_CACHING_SUPPORT - cachefs_uncache_page(vnode->cache, page); + +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); #endif + BUG_ON(PageFsCache(page)); goto error; } SetPageUptodate(page); -#ifdef AFS_CACHING_SUPPORT - if (cachefs_write_page(vnode->cache, - page, - afs_file_readpage_write_complete, - NULL, - GFP_KERNEL) != 0 - ) { - cachefs_uncache_page(vnode->cache, page); - unlock_page(page); + /* send the page to the cache */ +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page) && + fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) { + fscache_uncache_page(vnode->cache, page); + BUG_ON(PageFsCache(page)); } -#else - unlock_page(page); #endif + unlock_page(page); } _leave(" = 0"); @@ -232,34 +216,59 @@ error: } /* - * invalidate part or all of a page + * read a set of pages */ -static void afs_invalidatepage(struct page *page, unsigned long offset) +static int afs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) { - int ret = 1; + struct afs_vnode *vnode; + int ret = 0; - _enter("{%lu},%lu", page->index, offset); + _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); - BUG_ON(!PageLocked(page)); + vnode = AFS_FS_I(mapping->host); + if (vnode->flags & AFS_VNODE_DELETED) { + _leave(" = -ESTALE"); + return -ESTALE; + } - if (PagePrivate(page)) { - /* We release buffers only if the entire page is being - * invalidated. - * The get_block cached value has been unconditionally - * invalidated, so real IO is not possible anymore. - */ - if (offset == 0) { - BUG_ON(!PageLocked(page)); - - ret = 0; - if (!PageWriteback(page)) - ret = page->mapping->a_ops->releasepage(page, - 0); - /* possibly should BUG_ON(!ret); - neilb */ - } + /* attempt to read as many of the pages as possible */ +#ifdef CONFIG_AFS_FSCACHE + ret = fscache_read_or_alloc_pages(vnode->cache, + mapping, + pages, + &nr_pages, + afs_file_readpage_read_complete, + NULL, + mapping_gfp_mask(mapping)); +#else + ret = -ENOBUFS; +#endif + + switch (ret) { + /* all pages are being read from the cache */ + case 0: + BUG_ON(!list_empty(pages)); + BUG_ON(nr_pages != 0); + _leave(" = 0 [reading all]"); + return 0; + + /* there were pages that couldn't be read from the cache */ + case -ENODATA: + case -ENOBUFS: + break; + + /* other error */ + default: + _leave(" = %d", ret); + return ret; } - _leave(" = %d", ret); + /* load the missing pages from the network */ + ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); + + _leave(" = %d [netting]", ret); + return ret; } /* @@ -273,25 +282,82 @@ static int afs_launder_page(struct page *page) } /* - * release a page and cleanup its private data + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_invalidatepage(struct page *page, unsigned long offset) +{ + struct afs_writeback *wb = (struct afs_writeback *) page_private(page); + + _enter("{%lu},%lu", page->index, offset); + + BUG_ON(!PageLocked(page)); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0) { +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + ClearPageFsCache(page); + } +#endif + + if (PagePrivate(page)) { + if (wb && !PageWriteback(page)) { + set_page_private(page, 0); + afs_put_writeback(wb); + } + + if (!page_private(page)) + ClearPagePrivate(page); + } + } + + _leave(""); +} + +/* + * release a page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { + struct afs_writeback *wb = (struct afs_writeback *) page_private(page); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - struct afs_writeback *wb; _enter("{{%x:%u}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, gfp_flags); + /* deny if page is being written to the cache and the caller hasn't + * elected to wait */ +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + if (fscache_check_page_write(vnode->cache, page)) { + if (!(gfp_flags & __GFP_WAIT)) { + _leave(" = F [cache busy]"); + return 0; + } + fscache_wait_on_page_write(vnode->cache, page); + } + + fscache_uncache_page(vnode->cache, page); + ClearPageFsCache(page); + } +#endif + if (PagePrivate(page)) { - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - set_page_private(page, 0); + if (wb) { + set_page_private(page, 0); + afs_put_writeback(wb); + } ClearPagePrivate(page); - afs_put_writeback(wb); } - _leave(" = 0"); - return 0; + /* indicate that the page can be released */ + _leave(" = T"); + return 1; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index bb47217f6a1..c048f065875 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -61,6 +61,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) return -EBADMSG; } +#ifdef CONFIG_AFS_FSCACHE + if (vnode->status.size != inode->i_size) + fscache_attr_changed(vnode->cache); +#endif + inode->i_nlink = vnode->status.nlink; inode->i_uid = vnode->status.owner; inode->i_gid = 0; @@ -149,15 +154,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, return inode; } -#ifdef AFS_CACHING_SUPPORT - /* set up caching before reading the status, as fetch-status reads the - * first page of symlinks to see if they're really mntpts */ - cachefs_acquire_cookie(vnode->volume->cache, - NULL, - vnode, - &vnode->cache); -#endif - if (!status) { /* it's a remotely extant inode */ set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); @@ -183,6 +179,15 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, } } + /* set up caching before mapping the status, as map-status reads the + * first page of symlinks to see if they're really mountpoints */ + inode->i_size = vnode->status.size; +#ifdef CONFIG_AFS_FSCACHE + vnode->cache = fscache_acquire_cookie(vnode->volume->cache, + &afs_vnode_cache_index_def, + vnode); +#endif + ret = afs_inode_map_status(vnode, key); if (ret < 0) goto bad_inode; @@ -196,6 +201,10 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vnode->cache, 0); + vnode->cache = NULL; +#endif iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); @@ -340,8 +349,8 @@ void afs_clear_inode(struct inode *inode) ASSERT(list_empty(&vnode->writebacks)); ASSERT(!vnode->cb_promised); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vnode->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vnode->cache, 0); vnode->cache = NULL; #endif diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 67f259d99cd..106be66dafd 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -21,6 +21,7 @@ #include "afs.h" #include "afs_vl.h" +#include "cache.h" #define AFS_CELL_MAX_ADDRS 15 @@ -193,8 +194,8 @@ struct afs_cell { struct key *anonymous_key; /* anonymous user key for this cell */ struct list_head proc_link; /* /proc cell list link */ struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif /* server record management */ @@ -249,8 +250,8 @@ struct afs_vlocation { struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ struct afs_cell *cell; /* cell to which volume belongs */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ @@ -302,8 +303,8 @@ struct afs_volume { atomic_t usage; struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ struct afs_vlocation *vlocation; /* volume location */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif afs_volid_t vid; /* volume ID */ afs_voltype_t type; /* type of volume */ @@ -333,8 +334,8 @@ struct afs_vnode { struct afs_server *server; /* server currently supplying this file */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_permits *permits; /* cache of permits so far obtained */ struct mutex permits_lock; /* lock for altering permits list */ @@ -428,6 +429,22 @@ struct afs_uuid { /*****************************************************************************/ /* + * cache.c + */ +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_netfs afs_cache_netfs; +extern struct fscache_cookie_def afs_cell_cache_index_def; +extern struct fscache_cookie_def afs_vlocation_cache_index_def; +extern struct fscache_cookie_def afs_volume_cache_index_def; +extern struct fscache_cookie_def afs_vnode_cache_index_def; +#else +#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) +#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL) +#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) +#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) +#endif + +/* * callback.c */ extern void afs_init_callback_state(struct afs_server *); @@ -446,9 +463,6 @@ extern void afs_callback_update_kill(void); */ extern struct rw_semaphore afs_proc_cells_sem; extern struct list_head afs_proc_cells; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_cache_cell_index_def; -#endif #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) extern int afs_cell_init(char *); @@ -554,9 +568,6 @@ extern void afs_clear_inode(struct inode *); * main.c */ extern struct afs_uuid afs_uuid; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_netfs afs_cache_netfs; -#endif /* * misc.c @@ -637,10 +648,6 @@ extern int afs_get_MAC_address(u8 *, size_t); /* * vlclient.c */ -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vlocation_cache_index_def; -#endif - extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, const char *, struct afs_cache_vlocation *, const struct afs_wait_mode *); @@ -664,12 +671,6 @@ extern void afs_vlocation_purge(void); /* * vnode.c */ -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vnode_cache_index_def; -#endif - -extern struct afs_timer_ops afs_vnode_cb_timed_out_ops; - static inline struct afs_vnode *AFS_FS_I(struct inode *inode) { return container_of(inode, struct afs_vnode, vfs_inode); @@ -711,10 +712,6 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); /* * volume.c */ -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_volume_cache_index_def; -#endif - #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) extern void afs_put_volume(struct afs_volume *); diff --git a/fs/afs/main.c b/fs/afs/main.c index 2d3e5d4fb9f..66d54d348c5 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -1,6 +1,6 @@ /* AFS client file system * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -29,18 +29,6 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -#ifdef AFS_CACHING_SUPPORT -static struct cachefs_netfs_operations afs_cache_ops = { - .get_page_cookie = afs_cache_get_page_cookie, -}; - -struct cachefs_netfs afs_cache_netfs = { - .name = "afs", - .version = 0, - .ops = &afs_cache_ops, -}; -#endif - struct afs_uuid afs_uuid; /* @@ -104,10 +92,9 @@ static int __init afs_init(void) if (ret < 0) return ret; -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* we want to be able to cache */ - ret = cachefs_register_netfs(&afs_cache_netfs, - &afs_cache_cell_index_def); + ret = fscache_register_netfs(&afs_cache_netfs); if (ret < 0) goto error_cache; #endif @@ -142,8 +129,8 @@ error_fs: error_open_socket: error_vl_update_init: error_cell_init: -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); error_cache: #endif afs_callback_update_kill(); @@ -175,8 +162,8 @@ static void __exit afs_exit(void) afs_vlocation_purge(); flush_scheduled_work(); afs_cell_purge(); -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); #endif afs_proc_cleanup(); rcu_barrier(); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 78db4953a80..2b9e2d03a39 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -173,9 +173,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) if (PageError(page)) goto error; - buf = kmap(page); + buf = kmap_atomic(page, KM_USER0); memcpy(devname, buf, size); - kunmap(page); + kunmap_atomic(buf, KM_USER0); page_cache_release(page); page = NULL; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 849fc3160cb..ec2a7431e45 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -281,9 +281,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl, vl->vldb = *vldb; -#ifdef AFS_CACHING_SUPPORT - /* update volume entry in local cache */ - cachefs_update_cookie(vl->cache); +#ifdef CONFIG_AFS_FSCACHE + fscache_update_cookie(vl->cache); #endif } @@ -304,11 +303,9 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, memset(&vldb, 0, sizeof(vldb)); /* see if we have an in-cache copy (will set vl->valid if there is) */ -#ifdef AFS_CACHING_SUPPORT - cachefs_acquire_cookie(cell->cache, - &afs_volume_cache_index_def, - vlocation, - &vl->cache); +#ifdef CONFIG_AFS_FSCACHE + vl->cache = fscache_acquire_cookie(vl->cell->cache, + &afs_vlocation_cache_index_def, vl); #endif if (vl->valid) { @@ -420,6 +417,11 @@ fill_in_record: spin_unlock(&vl->lock); wake_up(&vl->waitq); + /* update volume entry in local cache */ +#ifdef CONFIG_AFS_FSCACHE + fscache_update_cookie(vl->cache); +#endif + /* schedule for regular updates */ afs_vlocation_queue_for_updates(vl); goto success; @@ -465,7 +467,7 @@ found_in_memory: spin_unlock(&vl->lock); success: - _leave(" = %p",vl); + _leave(" = %p", vl); return vl; error_abandon: @@ -523,10 +525,9 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl) { _enter("%p", vl); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vl->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vl->cache, 0); #endif - afs_put_cell(vl->cell); kfree(vl); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 8bab0e3437f..a353e69e239 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -124,13 +124,11 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) } /* attach the cache and volume location */ -#ifdef AFS_CACHING_SUPPORT - cachefs_acquire_cookie(vlocation->cache, - &afs_vnode_cache_index_def, - volume, - &volume->cache); +#ifdef CONFIG_AFS_FSCACHE + volume->cache = fscache_acquire_cookie(vlocation->cache, + &afs_volume_cache_index_def, + volume); #endif - afs_get_vlocation(vlocation); volume->vlocation = vlocation; @@ -194,8 +192,8 @@ void afs_put_volume(struct afs_volume *volume) up_write(&vlocation->cell->vl_sem); /* finish cleaning up the volume */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(volume->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(volume->cache, 0); #endif afs_put_vlocation(vlocation); diff --git a/fs/afs/write.c b/fs/afs/write.c index 3fb36d43362..c2e7a7ff008 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -780,3 +780,24 @@ int afs_fsync(struct file *file, struct dentry *dentry, int datasync) _leave(" = %d", ret); return ret; } + +/* + * notification that a previously read-only page is about to become writable + * - if it returns an error, the caller will deliver a bus error signal + */ +int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); + + _enter("{{%x:%u}},{%lx}", + vnode->fid.vid, vnode->fid.vnode, page->index); + + /* wait for the page to be written to the cache before we allow it to + * be modified */ +#ifdef CONFIG_AFS_FSCACHE + fscache_wait_on_page_write(vnode->cache, page); +#endif + + _leave(" = 0"); + return 0; +} diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig new file mode 100644 index 00000000000..80e9c6167f0 --- /dev/null +++ b/fs/cachefiles/Kconfig @@ -0,0 +1,39 @@ + +config CACHEFILES + tristate "Filesystem caching on files" + depends on FSCACHE && BLOCK + help + This permits use of a mounted filesystem as a cache for other + filesystems - primarily networking filesystems - thus allowing fast + local disk to enhance the speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. + +config CACHEFILES_DEBUG + bool "Debug CacheFiles" + depends on CACHEFILES + help + This permits debugging to be dynamically enabled in the filesystem + caching on files module. If this is set, the debugging output may be + enabled by setting bits in /sys/modules/cachefiles/parameter/debug or + by including a debugging specifier in /etc/cachefilesd.conf. + +config CACHEFILES_HISTOGRAM + bool "Gather latency information on CacheFiles" + depends on CACHEFILES && PROC_FS + help + + This option causes latency information to be gathered on CacheFiles + operation and exported through file: + + /proc/fs/cachefiles/histogram + + The generation of this histogram adds a certain amount of overhead to + execution as there are a number of points at which data is gathered, + and on a multi-CPU system these may be on cachelines that keep + bouncing between CPUs. On the other hand, the histogram may be + useful for debugging purposes. Saying 'N' here is recommended. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 00000000000..32cbab0ffce --- /dev/null +++ b/fs/cachefiles/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for caching in a mounted filesystem +# + +cachefiles-y := \ + bind.o \ + daemon.o \ + interface.o \ + key.o \ + main.o \ + namei.o \ + rdwr.o \ + security.o \ + xattr.o + +cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c new file mode 100644 index 00000000000..3797e0077b3 --- /dev/null +++ b/fs/cachefiles/bind.c @@ -0,0 +1,286 @@ +/* Bind and unbind a cache from the filesystem backing it + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/completion.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/statfs.h> +#include <linux/ctype.h> +#include "internal.h" + +static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); + +/* + * bind a directory as a cache + */ +int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) +{ + _enter("{%u,%u,%u,%u,%u,%u},%s", + cache->frun_percent, + cache->fcull_percent, + cache->fstop_percent, + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + /* start by checking things over */ + ASSERT(cache->fstop_percent >= 0 && + cache->fstop_percent < cache->fcull_percent && + cache->fcull_percent < cache->frun_percent && + cache->frun_percent < 100); + + ASSERT(cache->bstop_percent >= 0 && + cache->bstop_percent < cache->bcull_percent && + cache->bcull_percent < cache->brun_percent && + cache->brun_percent < 100); + + if (*args) { + kerror("'bind' command doesn't take an argument"); + return -EINVAL; + } + + if (!cache->rootdirname) { + kerror("No cache directory specified"); + return -EINVAL; + } + + /* don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("Cache already bound"); + return -EBUSY; + } + + /* make sure we have copies of the tag and dirname strings */ + if (!cache->tag) { + /* the tag string is released by the fops->release() + * function, so we don't release it on error here */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + + /* add the cache */ + return cachefiles_daemon_add_cache(cache); +} + +/* + * add a cache + */ +static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) +{ + struct cachefiles_object *fsdef; + struct nameidata nd; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + const struct cred *saved_cred; + int ret; + + _enter(""); + + /* we want to work under the module's security ID */ + ret = cachefiles_get_security_ID(cache); + if (ret < 0) + return ret; + + cachefiles_begin_secure(cache, &saved_cred); + + /* allocate the root index object */ + ret = -ENOMEM; + + fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); + if (!fsdef) + goto error_root_object; + + ASSERTCMP(fsdef->backer, ==, NULL); + + atomic_set(&fsdef->usage, 1); + fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; + + _debug("- fsdef %p", fsdef); + + /* look up the directory at the root of the cache */ + memset(&nd, 0, sizeof(nd)); + + ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd); + if (ret < 0) + goto error_open_root; + + cache->mnt = mntget(nd.path.mnt); + root = dget(nd.path.dentry); + path_put(&nd.path); + + /* check parameters */ + ret = -EOPNOTSUPP; + if (!root->d_inode || + !root->d_inode->i_op || + !root->d_inode->i_op->lookup || + !root->d_inode->i_op->mkdir || + !root->d_inode->i_op->setxattr || + !root->d_inode->i_op->getxattr || + !root->d_sb || + !root->d_sb->s_op || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs) + goto error_unsupported; + + ret = -EROFS; + if (root->d_sb->s_flags & MS_RDONLY) + goto error_unsupported; + + /* determine the security of the on-disk cache as this governs + * security ID of files we create */ + ret = cachefiles_determine_cache_security(cache, root, &saved_cred); + if (ret < 0) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = vfs_statfs(root, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = 0; + if (stats.f_bsize < PAGE_SIZE) + cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", + (unsigned long long) stats.f_blocks, + (unsigned long long) stats.f_bavail); + + /* set up caching limits */ + do_div(stats.f_files, 100); + cache->fstop = stats.f_files * cache->fstop_percent; + cache->fcull = stats.f_files * cache->fcull_percent; + cache->frun = stats.f_files * cache->frun_percent; + + _debug("limits {%llu,%llu,%llu} files", + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop); + + stats.f_blocks >>= cache->bshift; + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu} blocks", + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache"); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + fsdef->dentry = cachedir; + fsdef->fscache.cookie = NULL; + + ret = cachefiles_check_object_type(fsdef); + if (ret < 0) + goto error_unsupported; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard"); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + + /* publish the cache */ + fscache_init_cache(&cache->cache, + &cachefiles_cache_ops, + "%s", + fsdef->dentry->d_sb->s_id); + + fscache_object_init(&fsdef->fscache, NULL, &cache->cache); + + ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + printk(KERN_INFO "CacheFiles:" + " File cache on %s registered\n", + cache->cache.identifier); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0); + cachefiles_end_secure(cache, saved_cred); + return 0; + +error_add_cache: + dput(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + mntput(cache->mnt); + cache->mnt = NULL; + dput(fsdef->dentry); + fsdef->dentry = NULL; + dput(root); +error_open_root: + kmem_cache_free(cachefiles_object_jar, fsdef); +error_root_object: + cachefiles_end_secure(cache, saved_cred); + kerror("Failed to register: %d", ret); + return ret; +} + +/* + * unbind a cache on fd release + */ +void cachefiles_daemon_unbind(struct cachefiles_cache *cache) +{ + _enter(""); + + if (test_bit(CACHEFILES_READY, &cache->flags)) { + printk(KERN_INFO "CacheFiles:" + " File cache on %s unregistering\n", + cache->cache.identifier); + + fscache_withdraw_cache(&cache->cache); + } + + dput(cache->graveyard); + mntput(cache->mnt); + + kfree(cache->rootdirname); + kfree(cache->secctx); + kfree(cache->tag); + + _leave(""); +} diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c new file mode 100644 index 00000000000..4618516dd99 --- /dev/null +++ b/fs/cachefiles/daemon.c @@ -0,0 +1,755 @@ +/* Daemon interface + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/completion.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <linux/poll.h> +#include <linux/mount.h> +#include <linux/statfs.h> +#include <linux/ctype.h> +#include <linux/fs_struct.h> +#include "internal.h" + +static int cachefiles_daemon_open(struct inode *, struct file *); +static int cachefiles_daemon_release(struct inode *, struct file *); +static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, + loff_t *); +static ssize_t cachefiles_daemon_write(struct file *, const char __user *, + size_t, loff_t *); +static unsigned int cachefiles_daemon_poll(struct file *, + struct poll_table_struct *); +static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); +static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); +static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); +static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); +static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); + +static unsigned long cachefiles_open; + +const struct file_operations cachefiles_daemon_fops = { + .owner = THIS_MODULE, + .open = cachefiles_daemon_open, + .release = cachefiles_daemon_release, + .read = cachefiles_daemon_read, + .write = cachefiles_daemon_write, + .poll = cachefiles_daemon_poll, +}; + +struct cachefiles_daemon_cmd { + char name[8]; + int (*handler)(struct cachefiles_cache *cache, char *args); +}; + +static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { + { "bind", cachefiles_daemon_bind }, + { "brun", cachefiles_daemon_brun }, + { "bcull", cachefiles_daemon_bcull }, + { "bstop", cachefiles_daemon_bstop }, + { "cull", cachefiles_daemon_cull }, + { "debug", cachefiles_daemon_debug }, + { "dir", cachefiles_daemon_dir }, + { "frun", cachefiles_daemon_frun }, + { "fcull", cachefiles_daemon_fcull }, + { "fstop", cachefiles_daemon_fstop }, + { "inuse", cachefiles_daemon_inuse }, + { "secctx", cachefiles_daemon_secctx }, + { "tag", cachefiles_daemon_tag }, + { "", NULL } +}; + + +/* + * do various checks + */ +static int cachefiles_daemon_open(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache; + + _enter(""); + + /* only the superuser may do this */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* the cachefiles device may only be open once at a time */ + if (xchg(&cachefiles_open, 1) == 1) + return -EBUSY; + + /* allocate a cache record */ + cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); + if (!cache) { + cachefiles_open = 0; + return -ENOMEM; + } + + mutex_init(&cache->daemon_mutex); + cache->active_nodes = RB_ROOT; + rwlock_init(&cache->active_lock); + init_waitqueue_head(&cache->daemon_pollwq); + + /* set default caching limits + * - limit at 1% free space and/or free files + * - cull below 5% free space and/or free files + * - cease culling above 7% free space and/or free files + */ + cache->frun_percent = 7; + cache->fcull_percent = 5; + cache->fstop_percent = 1; + cache->brun_percent = 7; + cache->bcull_percent = 5; + cache->bstop_percent = 1; + + file->private_data = cache; + cache->cachefilesd = file; + return 0; +} + +/* + * release a cache + */ +static int cachefiles_daemon_release(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache = file->private_data; + + _enter(""); + + ASSERT(cache); + + set_bit(CACHEFILES_DEAD, &cache->flags); + + cachefiles_daemon_unbind(cache); + + ASSERT(!cache->active_nodes.rb_node); + + /* clean up the control file interface */ + cache->cachefilesd = NULL; + file->private_data = NULL; + cachefiles_open = 0; + + kfree(cache); + + _leave(""); + return 0; +} + +/* + * read the cache state + */ +static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + char buffer[256]; + int n; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0); + + /* summarise */ + clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + + n = snprintf(buffer, sizeof(buffer), + "cull=%c" + " frun=%llx" + " fcull=%llx" + " fstop=%llx" + " brun=%llx" + " bcull=%llx" + " bstop=%llx", + test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop, + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop + ); + + if (n > buflen) + return -EMSGSIZE; + + if (copy_to_user(_buffer, buffer, n) != 0) + return -EFAULT; + + return n; +} + +/* + * command the cache + */ +static ssize_t cachefiles_daemon_write(struct file *file, + const char __user *_data, + size_t datalen, + loff_t *pos) +{ + const struct cachefiles_daemon_cmd *cmd; + struct cachefiles_cache *cache = file->private_data; + ssize_t ret; + char *data, *args, *cp; + + //_enter(",,%zu,", datalen); + + ASSERT(cache); + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + if (datalen < 0 || datalen > PAGE_SIZE - 1) + return -EOPNOTSUPP; + + /* drag the command string into the kernel so we can parse it */ + data = kmalloc(datalen + 1, GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, _data, datalen) != 0) + goto error; + + data[datalen] = '\0'; + + ret = -EINVAL; + if (memchr(data, '\0', datalen)) + goto error; + + /* strip any newline */ + cp = memchr(data, '\n', datalen); + if (cp) { + if (cp == data) + goto error; + + *cp = '\0'; + } + + /* parse the command */ + ret = -EOPNOTSUPP; + + for (args = data; *args; args++) + if (isspace(*args)) + break; + if (*args) { + if (args == data) + goto error; + *args = '\0'; + for (args++; isspace(*args); args++) + continue; + } + + /* run the appropriate command handler */ + for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) + if (strcmp(cmd->name, data) == 0) + goto found_command; + +error: + kfree(data); + //_leave(" = %zd", ret); + return ret; + +found_command: + mutex_lock(&cache->daemon_mutex); + + ret = -EIO; + if (!test_bit(CACHEFILES_DEAD, &cache->flags)) + ret = cmd->handler(cache, args); + + mutex_unlock(&cache->daemon_mutex); + + if (ret == 0) + ret = datalen; + goto error; +} + +/* + * poll for culling state + * - use POLLOUT to indicate culling state + */ +static unsigned int cachefiles_daemon_poll(struct file *file, + struct poll_table_struct *poll) +{ + struct cachefiles_cache *cache = file->private_data; + unsigned int mask; + + poll_wait(file, &cache->daemon_pollwq, poll); + mask = 0; + + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |= POLLIN; + + if (test_bit(CACHEFILES_CULLING, &cache->flags)) + mask |= POLLOUT; + + return mask; +} + +/* + * give a range error for cache space constraints + * - can be tail-called + */ +static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, + char *args) +{ + kerror("Free space limits must be in range" + " 0%%<=stop<cull<run<100%%"); + + return -EINVAL; +} + +/* + * set the percentage of files at which to stop culling + * - command: "frun <N>%" + */ +static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) +{ + unsigned long frun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + frun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (frun <= cache->fcull_percent || frun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->frun_percent = frun; + return 0; +} + +/* + * set the percentage of files at which to start culling + * - command: "fcull <N>%" + */ +static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long fcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fcull_percent = fcull; + return 0; +} + +/* + * set the percentage of files at which to stop allocating + * - command: "fstop <N>%" + */ +static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long fstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fstop < 0 || fstop >= cache->fcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fstop_percent = fstop; + return 0; +} + +/* + * set the percentage of blocks at which to stop culling + * - command: "brun <N>%" + */ +static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) +{ + unsigned long brun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + brun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (brun <= cache->bcull_percent || brun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->brun_percent = brun; + return 0; +} + +/* + * set the percentage of blocks at which to start culling + * - command: "bcull <N>%" + */ +static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long bcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bcull_percent = bcull; + return 0; +} + +/* + * set the percentage of blocks at which to stop allocating + * - command: "bstop <N>%" + */ +static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long bstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bstop < 0 || bstop >= cache->bcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bstop_percent = bstop; + return 0; +} + +/* + * set the cache directory + * - command: "dir <name>" + */ +static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) +{ + char *dir; + + _enter(",%s", args); + + if (!*args) { + kerror("Empty directory specified"); + return -EINVAL; + } + + if (cache->rootdirname) { + kerror("Second cache directory specified"); + return -EEXIST; + } + + dir = kstrdup(args, GFP_KERNEL); + if (!dir) + return -ENOMEM; + + cache->rootdirname = dir; + return 0; +} + +/* + * set the cache security context + * - command: "secctx <ctx>" + */ +static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) +{ + char *secctx; + + _enter(",%s", args); + + if (!*args) { + kerror("Empty security context specified"); + return -EINVAL; + } + + if (cache->secctx) { + kerror("Second security context specified"); + return -EINVAL; + } + + secctx = kstrdup(args, GFP_KERNEL); + if (!secctx) + return -ENOMEM; + + cache->secctx = secctx; + return 0; +} + +/* + * set the cache tag + * - command: "tag <name>" + */ +static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) +{ + char *tag; + + _enter(",%s", args); + + if (!*args) { + kerror("Empty tag specified"); + return -EINVAL; + } + + if (cache->tag) + return -EEXIST; + + tag = kstrdup(args, GFP_KERNEL); + if (!tag) + return -ENOMEM; + + cache->tag = tag; + return 0; +} + +/* + * request a node in the cache be culled from the current working directory + * - command: "cull <name>" + */ +static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) +{ + struct fs_struct *fs; + struct dentry *dir; + const struct cred *saved_cred; + int ret; + + _enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("cull applied to unready cache"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + kerror("cull applied to dead cache"); + return -EIO; + } + + /* extract the directory dentry from the cwd */ + fs = current->fs; + read_lock(&fs->lock); + dir = dget(fs->pwd.dentry); + read_unlock(&fs->lock); + + if (!S_ISDIR(dir->d_inode->i_mode)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_cull(cache, dir, args); + cachefiles_end_secure(cache, saved_cred); + + dput(dir); + _leave(" = %d", ret); + return ret; + +notdir: + dput(dir); + kerror("cull command requires dirfd to be a directory"); + return -ENOTDIR; + +inval: + kerror("cull command requires dirfd and filename"); + return -EINVAL; +} + +/* + * set debugging mode + * - command: "debug <mask>" + */ +static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) +{ + unsigned long mask; + + _enter(",%s", args); + + mask = simple_strtoul(args, &args, 0); + if (args[0] != '\0') + goto inval; + + cachefiles_debug = mask; + _leave(" = 0"); + return 0; + +inval: + kerror("debug command requires mask"); + return -EINVAL; +} + +/* + * find out whether an object in the current working directory is in use or not + * - command: "inuse <name>" + */ +static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) +{ + struct fs_struct *fs; + struct dentry *dir; + const struct cred *saved_cred; + int ret; + + //_enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("inuse applied to unready cache"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + kerror("inuse applied to dead cache"); + return -EIO; + } + + /* extract the directory dentry from the cwd */ + fs = current->fs; + read_lock(&fs->lock); + dir = dget(fs->pwd.dentry); + read_unlock(&fs->lock); + + if (!S_ISDIR(dir->d_inode->i_mode)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_check_in_use(cache, dir, args); + cachefiles_end_secure(cache, saved_cred); + + dput(dir); + //_leave(" = %d", ret); + return ret; + +notdir: + dput(dir); + kerror("inuse command requires dirfd to be a directory"); + return -ENOTDIR; + +inval: + kerror("inuse command requires dirfd and filename"); + return -EINVAL; +} + +/* + * see if we have space for a number of pages and/or a number of files in the + * cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr) +{ + struct kstatfs stats; + int ret; + + //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", + // (unsigned long long) cache->frun, + // (unsigned long long) cache->fcull, + // (unsigned long long) cache->fstop, + // (unsigned long long) cache->brun, + // (unsigned long long) cache->bcull, + // (unsigned long long) cache->bstop, + // fnr, bnr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = vfs_statfs(cache->mnt->mnt_root, &stats); + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + _leave(" = %d", ret); + return ret; + } + + stats.f_bavail >>= cache->bshift; + + //_debug("avail %llu,%llu", + // (unsigned long long) stats.f_ffree, + // (unsigned long long) stats.f_bavail); + + /* see if there is sufficient space */ + if (stats.f_ffree > fnr) + stats.f_ffree -= fnr; + else + stats.f_ffree = 0; + + if (stats.f_bavail > bnr) + stats.f_bavail -= bnr; + else + stats.f_bavail = 0; + + ret = -ENOBUFS; + if (stats.f_ffree < cache->fstop || + stats.f_bavail < cache->bstop) + goto begin_cull; + + ret = 0; + if (stats.f_ffree < cache->fcull || + stats.f_bavail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_ffree >= cache->frun && + stats.f_bavail >= cache->brun && + test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) + ) { + _debug("cease culling"); + cachefiles_state_changed(cache); + } + + //_leave(" = 0"); + return 0; + +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + cachefiles_state_changed(cache); + } + + _leave(" = %d", ret); + return ret; +} diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c new file mode 100644 index 00000000000..1e962348d11 --- /dev/null +++ b/fs/cachefiles/interface.c @@ -0,0 +1,449 @@ +/* FS-Cache interface to CacheFiles + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/mount.h> +#include <linux/buffer_head.h> +#include "internal.h" + +#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) + +struct cachefiles_lookup_data { + struct cachefiles_xattr *auxdata; /* auxiliary data */ + char *key; /* key path */ +}; + +static int cachefiles_attr_changed(struct fscache_object *_object); + +/* + * allocate an object record for a cookie lookup and prepare the lookup data + */ +static struct fscache_object *cachefiles_alloc_object( + struct fscache_cache *_cache, + struct fscache_cookie *cookie) +{ + struct cachefiles_lookup_data *lookup_data; + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_xattr *auxdata; + unsigned keylen, auxlen; + void *buffer; + char *key; + + cache = container_of(_cache, struct cachefiles_cache, cache); + + _enter("{%s},%p,", cache->cache.identifier, cookie); + + lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); + if (!lookup_data) + goto nomem_lookup_data; + + /* create a new object record and a temporary leaf image */ + object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); + if (!object) + goto nomem_object; + + ASSERTCMP(object->backer, ==, NULL); + + BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + atomic_set(&object->usage, 1); + + fscache_object_init(&object->fscache, cookie, &cache->cache); + + object->type = cookie->def->type; + + /* get hold of the raw key + * - stick the length on the front and leave space on the back for the + * encoder + */ + buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); + if (!buffer) + goto nomem_buffer; + + keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); + ASSERTCMP(keylen, <, 512); + + *(uint16_t *)buffer = keylen; + ((char *)buffer)[keylen + 2] = 0; + ((char *)buffer)[keylen + 3] = 0; + ((char *)buffer)[keylen + 4] = 0; + + /* turn the raw key into something that can work with as a filename */ + key = cachefiles_cook_key(buffer, keylen + 2, object->type); + if (!key) + goto nomem_key; + + /* get hold of the auxiliary data and prepend the object type */ + auxdata = buffer; + auxlen = 0; + if (cookie->def->get_aux) { + auxlen = cookie->def->get_aux(cookie->netfs_data, + auxdata->data, 511); + ASSERTCMP(auxlen, <, 511); + } + + auxdata->len = auxlen + 1; + auxdata->type = cookie->def->type; + + lookup_data->auxdata = auxdata; + lookup_data->key = key; + object->lookup_data = lookup_data; + + _leave(" = %p [%p]", &object->fscache, lookup_data); + return &object->fscache; + +nomem_key: + kfree(buffer); +nomem_buffer: + BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + kmem_cache_free(cachefiles_object_jar, object); + fscache_object_destroyed(&cache->cache); +nomem_object: + kfree(lookup_data); +nomem_lookup_data: + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * attempt to look up the nominated node in this cache + */ +static void cachefiles_lookup_object(struct fscache_object *_object) +{ + struct cachefiles_lookup_data *lookup_data; + struct cachefiles_object *parent, *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + int ret; + + _enter("{OBJ%x}", _object->debug_id); + + cache = container_of(_object->cache, struct cachefiles_cache, cache); + parent = container_of(_object->parent, + struct cachefiles_object, fscache); + object = container_of(_object, struct cachefiles_object, fscache); + lookup_data = object->lookup_data; + + ASSERTCMP(lookup_data, !=, NULL); + + /* look up the key, creating any missing bits */ + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_walk_to_object(parent, object, + lookup_data->key, + lookup_data->auxdata); + cachefiles_end_secure(cache, saved_cred); + + /* polish off by setting the attributes of non-index files */ + if (ret == 0 && + object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + cachefiles_attr_changed(&object->fscache); + + if (ret < 0) { + printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", + ret); + fscache_object_lookup_error(&object->fscache); + } + + _leave(" [%d]", ret); +} + +/* + * indication of lookup completion + */ +static void cachefiles_lookup_complete(struct fscache_object *_object) +{ + struct cachefiles_object *object; + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); + + if (object->lookup_data) { + kfree(object->lookup_data->key); + kfree(object->lookup_data->auxdata); + kfree(object->lookup_data); + object->lookup_data = NULL; + } +} + +/* + * increment the usage count on an inode object (may fail if unmounting) + */ +static +struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) +{ + struct cachefiles_object *object = + container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + atomic_inc(&object->usage); + return &object->fscache; +} + +/* + * update the auxilliary data for an object object on disk + */ +static void cachefiles_update_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_xattr *auxdata; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie; + const struct cred *saved_cred; + unsigned auxlen; + + _enter("{OBJ%x}", _object->debug_id); + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, struct cachefiles_cache, + cache); + cookie = object->fscache.cookie; + + if (!cookie->def->get_aux) { + _leave(" [no aux]"); + return; + } + + auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); + if (!auxdata) { + _leave(" [nomem]"); + return; + } + + auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); + ASSERTCMP(auxlen, <, 511); + + auxdata->len = auxlen + 1; + auxdata->type = cookie->def->type; + + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_update_object_xattr(object, auxdata); + cachefiles_end_secure(cache, saved_cred); + kfree(auxdata); + _leave(""); +} + +/* + * discard the resources pinned by an object and effect retirement if + * requested + */ +static void cachefiles_drop_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + + ASSERT(_object); + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", + object->fscache.debug_id, atomic_read(&object->usage)); + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + /* delete retired objects */ + if (object->fscache.state == FSCACHE_OBJECT_RECYCLING && + _object != cache->cache.fsdef + ) { + _debug("- retire object OBJ%x", object->fscache.debug_id); + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_delete_object(cache, object); + cachefiles_end_secure(cache, saved_cred); + } + + /* close the filesystem stuff attached to the object */ + if (object->backer != object->dentry) + dput(object->backer); + object->backer = NULL; + + /* note that the object is now inactive */ + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { + write_lock(&cache->active_lock); + if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE, + &object->flags)) + BUG(); + rb_erase(&object->active_node, &cache->active_nodes); + wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); + write_unlock(&cache->active_lock); + } + + dput(object->dentry); + object->dentry = NULL; + + _leave(""); +} + +/* + * dispose of a reference to an object + */ +static void cachefiles_put_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct fscache_cache *cache; + + ASSERT(_object); + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", + object->fscache.debug_id, atomic_read(&object->usage)); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + ASSERTIFCMP(object->fscache.parent, + object->fscache.parent->n_children, >, 0); + + if (atomic_dec_and_test(&object->usage)) { + _debug("- kill object OBJ%x", object->fscache.debug_id); + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + ASSERTCMP(object->fscache.parent, ==, NULL); + ASSERTCMP(object->backer, ==, NULL); + ASSERTCMP(object->dentry, ==, NULL); + ASSERTCMP(object->fscache.n_ops, ==, 0); + ASSERTCMP(object->fscache.n_children, ==, 0); + + if (object->lookup_data) { + kfree(object->lookup_data->key); + kfree(object->lookup_data->auxdata); + kfree(object->lookup_data); + object->lookup_data = NULL; + } + + cache = object->fscache.cache; + kmem_cache_free(cachefiles_object_jar, object); + fscache_object_destroyed(cache); + } + + _leave(""); +} + +/* + * sync a cache + */ +static void cachefiles_sync_cache(struct fscache_cache *_cache) +{ + struct cachefiles_cache *cache; + const struct cred *saved_cred; + int ret; + + _enter("%p", _cache); + + cache = container_of(_cache, struct cachefiles_cache, cache); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cachefiles_begin_secure(cache, &saved_cred); + ret = fsync_super(cache->mnt->mnt_sb); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock" + " returned error %d", + ret); +} + +/* + * notification the attributes on an object have changed + * - called with reads/writes excluded by FS-Cache + */ +static int cachefiles_attr_changed(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct iattr newattrs; + uint64_t ni_size; + loff_t oi_size; + int ret; + + _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); + + _enter("{OBJ%x},[%llu]", + _object->debug_id, (unsigned long long) ni_size); + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + if (ni_size == object->i_size) + return 0; + + if (!object->backer) + return -ENOBUFS; + + ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + + fscache_set_store_limit(&object->fscache, ni_size); + + oi_size = i_size_read(object->backer->d_inode); + if (oi_size == ni_size) + return 0; + + newattrs.ia_size = ni_size; + newattrs.ia_valid = ATTR_SIZE; + + cachefiles_begin_secure(cache, &saved_cred); + mutex_lock(&object->backer->d_inode->i_mutex); + ret = notify_change(object->backer, &newattrs); + mutex_unlock(&object->backer->d_inode->i_mutex); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) { + fscache_set_store_limit(&object->fscache, 0); + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * dissociate a cache from all the pages it was backing + */ +static void cachefiles_dissociate_pages(struct fscache_cache *cache) +{ + _enter(""); +} + +const struct fscache_cache_ops cachefiles_cache_ops = { + .name = "cachefiles", + .alloc_object = cachefiles_alloc_object, + .lookup_object = cachefiles_lookup_object, + .lookup_complete = cachefiles_lookup_complete, + .grab_object = cachefiles_grab_object, + .update_object = cachefiles_update_object, + .drop_object = cachefiles_drop_object, + .put_object = cachefiles_put_object, + .sync_cache = cachefiles_sync_cache, + .attr_changed = cachefiles_attr_changed, + .read_or_alloc_page = cachefiles_read_or_alloc_page, + .read_or_alloc_pages = cachefiles_read_or_alloc_pages, + .allocate_page = cachefiles_allocate_page, + .allocate_pages = cachefiles_allocate_pages, + .write_page = cachefiles_write_page, + .uncache_page = cachefiles_uncache_page, + .dissociate_pages = cachefiles_dissociate_pages, +}; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h new file mode 100644 index 00000000000..19218e1463d --- /dev/null +++ b/fs/cachefiles/internal.h @@ -0,0 +1,360 @@ +/* General netfs cache on cache files internal defs + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fscache-cache.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/security.h> + +struct cachefiles_cache; +struct cachefiles_object; + +extern unsigned cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + +/* + * node records + */ +struct cachefiles_object { + struct fscache_object fscache; /* fscache handle */ + struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ + struct dentry *dentry; /* the file/dir representing this object */ + struct dentry *backer; /* backing file */ + loff_t i_size; /* object size */ + unsigned long flags; +#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ + atomic_t usage; /* object usage count */ + uint8_t type; /* object type */ + uint8_t new; /* T if object new */ + spinlock_t work_lock; + struct rb_node active_node; /* link in active tree (dentry is key) */ +}; + +extern struct kmem_cache *cachefiles_object_jar; + +/* + * Cache files cache definition + */ +struct cachefiles_cache { + struct fscache_cache cache; /* FS-Cache record */ + struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *graveyard; /* directory into which dead objects go */ + struct file *cachefilesd; /* manager daemon handle */ + const struct cred *cache_cred; /* security override for accessing cache */ + struct mutex daemon_mutex; /* command serialisation mutex */ + wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ + struct rb_root active_nodes; /* active nodes (can't be culled) */ + rwlock_t active_lock; /* lock for active_nodes */ + atomic_t gravecounter; /* graveyard uniquifier */ + unsigned frun_percent; /* when to stop culling (% files) */ + unsigned fcull_percent; /* when to start culling (% files) */ + unsigned fstop_percent; /* when to stop allocating (% files) */ + unsigned brun_percent; /* when to stop culling (% blocks) */ + unsigned bcull_percent; /* when to start culling (% blocks) */ + unsigned bstop_percent; /* when to stop allocating (% blocks) */ + unsigned bsize; /* cache's block size */ + unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ + uint64_t frun; /* when to stop culling */ + uint64_t fcull; /* when to start culling */ + uint64_t fstop; /* when to stop allocating */ + sector_t brun; /* when to stop culling */ + sector_t bcull; /* when to start culling */ + sector_t bstop; /* when to stop allocating */ + unsigned long flags; +#define CACHEFILES_READY 0 /* T if cache prepared */ +#define CACHEFILES_DEAD 1 /* T if cache dead */ +#define CACHEFILES_CULLING 2 /* T if cull engaged */ +#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ + char *rootdirname; /* name of cache root directory */ + char *secctx; /* LSM security context */ + char *tag; /* cache binding tag */ +}; + +/* + * backing file read tracking + */ +struct cachefiles_one_read { + wait_queue_t monitor; /* link into monitored waitqueue */ + struct page *back_page; /* backing file page we're waiting for */ + struct page *netfs_page; /* netfs page we're going to fill */ + struct fscache_retrieval *op; /* retrieval op covering this */ + struct list_head op_link; /* link in op's todo list */ +}; + +/* + * backing file write tracking + */ +struct cachefiles_one_write { + struct page *netfs_page; /* netfs page to copy */ + struct cachefiles_object *object; + struct list_head obj_link; /* link in object's lists */ + fscache_rw_complete_t end_io_func; + void *context; +}; + +/* + * auxiliary data xattr buffer + */ +struct cachefiles_xattr { + uint16_t len; + uint8_t type; + uint8_t data[]; +}; + +/* + * note change of state for daemon + */ +static inline void cachefiles_state_changed(struct cachefiles_cache *cache) +{ + set_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + wake_up_all(&cache->daemon_pollwq); +} + +/* + * cf-bind.c + */ +extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); +extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); + +/* + * cf-daemon.c + */ +extern const struct file_operations cachefiles_daemon_fops; + +extern int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr); + +/* + * cf-interface.c + */ +extern const struct fscache_cache_ops cachefiles_cache_ops; + +/* + * cf-key.c + */ +extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); + +/* + * cf-namei.c + */ +extern int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object); +extern int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + const char *key, + struct cachefiles_xattr *auxdata); +extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *name); + +extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename); + +extern int cachefiles_check_in_use(struct cachefiles_cache *cache, + struct dentry *dir, char *filename); + +/* + * cf-proc.c + */ +#ifdef CONFIG_CACHEFILES_HISTOGRAM +extern atomic_t cachefiles_lookup_histogram[HZ]; +extern atomic_t cachefiles_mkdir_histogram[HZ]; +extern atomic_t cachefiles_create_histogram[HZ]; + +extern int __init cachefiles_proc_init(void); +extern void cachefiles_proc_cleanup(void); +static inline +void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) +{ + unsigned long jif = jiffies - start_jif; + if (jif >= HZ) + jif = HZ - 1; + atomic_inc(&histogram[jif]); +} + +#else +#define cachefiles_proc_init() (0) +#define cachefiles_proc_cleanup() do {} while (0) +#define cachefiles_hist(hist, start_jif) do {} while (0) +#endif + +/* + * cf-rdwr.c + */ +extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, + struct page *, gfp_t); +extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, + struct list_head *, unsigned *, + gfp_t); +extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, + gfp_t); +extern int cachefiles_allocate_pages(struct fscache_retrieval *, + struct list_head *, unsigned *, gfp_t); +extern int cachefiles_write_page(struct fscache_storage *, struct page *); +extern void cachefiles_uncache_page(struct fscache_object *, struct page *); + +/* + * cf-security.c + */ +extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); +extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred); + +static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, + const struct cred **_saved_cred) +{ + *_saved_cred = override_creds(cache->cache_cred); +} + +static inline void cachefiles_end_secure(struct cachefiles_cache *cache, + const struct cred *saved_cred) +{ + revert_creds(saved_cred); +} + +/* + * cf-xattr.c + */ +extern int cachefiles_check_object_type(struct cachefiles_object *object); +extern int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_update_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry); + + +/* + * error handling + */ +#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__) + +#define cachefiles_io_error(___cache, FMT, ...) \ +do { \ + kerror("I/O Error: " FMT, ##__VA_ARGS__); \ + fscache_io_error(&(___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ +} while (0) + +#define cachefiles_io_error_obj(object, FMT, ...) \ +do { \ + struct cachefiles_cache *___cache; \ + \ + ___cache = container_of((object)->fscache.cache, \ + struct cachefiles_cache, cache); \ + cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ +} while (0) + + +/* + * debug tracing + */ +#define dbgprintk(FMT, ...) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + +/* make sure we maintain the format strings, even when debugging is disabled */ +static inline void _dbprintk(const char *fmt, ...) + __attribute__((format(printf, 1, 2))); +static inline void _dbprintk(const char *fmt, ...) +{ +} + +#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_CACHEFILES_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) +#endif + +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c new file mode 100644 index 00000000000..81b8b2b3a67 --- /dev/null +++ b/fs/cachefiles/key.c @@ -0,0 +1,159 @@ +/* Key to pathname encoder + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/slab.h> +#include "internal.h" + +static const char cachefiles_charmap[64] = + "0123456789" /* 0 - 9 */ + "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ + "_-" /* 62 - 63 */ + ; + +static const char cachefiles_filecharmap[256] = { + /* we skip space and tab and control chars */ + [33 ... 46] = 1, /* '!' -> '.' */ + /* we skip '/' as it's significant to pathwalk */ + [48 ... 127] = 1, /* '0' -> '~' */ +}; + +/* + * turn the raw key into something cooked + * - the raw key should include the length in the two bytes at the front + * - the key may be up to 514 bytes in length (including the length word) + * - "base64" encode the strange keys, mapping 3 bytes of raw to four of + * cooked + * - need to cut the cooked key into 252 char lengths (189 raw bytes) + */ +char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) +{ + unsigned char csum, ch; + unsigned int acc; + char *key; + int loop, len, max, seg, mark, print; + + _enter(",%d", keylen); + + BUG_ON(keylen < 2 || keylen > 514); + + csum = raw[0] + raw[1]; + print = 1; + for (loop = 2; loop < keylen; loop++) { + ch = raw[loop]; + csum += ch; + print &= cachefiles_filecharmap[ch]; + } + + if (print) { + /* if the path is usable ASCII, then we render it directly */ + max = keylen - 2; + max += 2; /* two base64'd length chars on the front */ + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 251) / 252) = 3 + */ + max += 1; /* NUL on end */ + } else { + /* calculate the maximum length of the cooked key */ + keylen = (keylen + 2) / 3; + + max = keylen * 4; + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 188) / 189) = 3 + */ + max += 1; /* NUL on end */ + } + + max += 1; /* 2nd NUL on end */ + + _debug("max: %d", max); + + key = kmalloc(max, GFP_KERNEL); + if (!key) + return NULL; + + len = 0; + + /* build the cooked key */ + sprintf(key, "@%02x%c+", (unsigned) csum, 0); + len = 5; + mark = len - 1; + + if (print) { + acc = *(uint16_t *) raw; + raw += 2; + + key[len + 1] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len] = cachefiles_charmap[acc & 63]; + len += 2; + + seg = 250; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '\0'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + key[len++] = *raw++; + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; + default: type = 'S'; break; + } + } else { + seg = 252; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '\0'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + acc = *raw++; + acc |= *raw++ << 8; + acc |= *raw++ << 16; + + _debug("acc: %06x", acc); + + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; + default: type = 'T'; break; + } + } + + key[mark] = type; + key[len++] = 0; + key[len] = 0; + + _leave(" = %p %d", key, len); + return key; +} diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c new file mode 100644 index 00000000000..4bfa8cf43bf --- /dev/null +++ b/fs/cachefiles/main.c @@ -0,0 +1,106 @@ +/* Network filesystem caching backend to use cache files on a premounted + * filesystem + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/completion.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/statfs.h> +#include <linux/sysctl.h> +#include <linux/miscdevice.h> +#include "internal.h" + +unsigned cachefiles_debug; +module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); + +MODULE_DESCRIPTION("Mounted-filesystem based cache"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +struct kmem_cache *cachefiles_object_jar; + +static struct miscdevice cachefiles_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cachefiles", + .fops = &cachefiles_daemon_fops, +}; + +static void cachefiles_object_init_once(void *_object) +{ + struct cachefiles_object *object = _object; + + memset(object, 0, sizeof(*object)); + spin_lock_init(&object->work_lock); +} + +/* + * initialise the fs caching module + */ +static int __init cachefiles_init(void) +{ + int ret; + + ret = misc_register(&cachefiles_dev); + if (ret < 0) + goto error_dev; + + /* create an object jar */ + ret = -ENOMEM; + cachefiles_object_jar = + kmem_cache_create("cachefiles_object_jar", + sizeof(struct cachefiles_object), + 0, + SLAB_HWCACHE_ALIGN, + cachefiles_object_init_once); + if (!cachefiles_object_jar) { + printk(KERN_NOTICE + "CacheFiles: Failed to allocate an object jar\n"); + goto error_object_jar; + } + + ret = cachefiles_proc_init(); + if (ret < 0) + goto error_proc; + + printk(KERN_INFO "CacheFiles: Loaded\n"); + return 0; + +error_proc: + kmem_cache_destroy(cachefiles_object_jar); +error_object_jar: + misc_deregister(&cachefiles_dev); +error_dev: + kerror("failed to register: %d", ret); + return ret; +} + +fs_initcall(cachefiles_init); + +/* + * clean up on module removal + */ +static void __exit cachefiles_exit(void) +{ + printk(KERN_INFO "CacheFiles: Unloading\n"); + + cachefiles_proc_cleanup(); + kmem_cache_destroy(cachefiles_object_jar); + misc_deregister(&cachefiles_dev); +} + +module_exit(cachefiles_exit); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c new file mode 100644 index 00000000000..4ce818ae39e --- /dev/null +++ b/fs/cachefiles/namei.c @@ -0,0 +1,771 @@ +/* CacheFiles path walking and related routines + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/fsnotify.h> +#include <linux/quotaops.h> +#include <linux/xattr.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/security.h> +#include "internal.h" + +static int cachefiles_wait_bit(void *flags) +{ + schedule(); + return 0; +} + +/* + * record the fact that an object is now active + */ +static void cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_object *xobject; + struct rb_node **_p, *_parent = NULL; + struct dentry *dentry; + + _enter(",%p", object); + +try_again: + write_lock(&cache->active_lock); + + if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) + BUG(); + + dentry = object->dentry; + _p = &cache->active_nodes.rb_node; + while (*_p) { + _parent = *_p; + xobject = rb_entry(_parent, + struct cachefiles_object, active_node); + + ASSERT(xobject != object); + + if (xobject->dentry > dentry) + _p = &(*_p)->rb_left; + else if (xobject->dentry < dentry) + _p = &(*_p)->rb_right; + else + goto wait_for_old_object; + } + + rb_link_node(&object->active_node, _parent, _p); + rb_insert_color(&object->active_node, &cache->active_nodes); + + write_unlock(&cache->active_lock); + _leave(""); + return; + + /* an old object from a previous incarnation is hogging the slot - we + * need to wait for it to be destroyed */ +wait_for_old_object: + if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error:" + " Unexpected object collision\n"); + printk(KERN_ERR "xobject: OBJ%x\n", + xobject->fscache.debug_id); + printk(KERN_ERR "xobjstate=%s\n", + fscache_object_states[xobject->fscache.state]); + printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags); + printk(KERN_ERR "xobjevent=%lx [%lx]\n", + xobject->fscache.events, xobject->fscache.event_mask); + printk(KERN_ERR "xops=%u inp=%u exc=%u\n", + xobject->fscache.n_ops, xobject->fscache.n_in_progress, + xobject->fscache.n_exclusive); + printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n", + xobject->fscache.cookie, + xobject->fscache.cookie->parent, + xobject->fscache.cookie->netfs_data, + xobject->fscache.cookie->flags); + printk(KERN_ERR "xparent=%p\n", + xobject->fscache.parent); + printk(KERN_ERR "object: OBJ%x\n", + object->fscache.debug_id); + printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n", + object->fscache.cookie, + object->fscache.cookie->parent, + object->fscache.cookie->netfs_data, + object->fscache.cookie->flags); + printk(KERN_ERR "parent=%p\n", + object->fscache.parent); + BUG(); + } + atomic_inc(&xobject->usage); + write_unlock(&cache->active_lock); + + _debug(">>> wait"); + wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, + cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< waited"); + + cache->cache.ops->put_object(&xobject->fscache); + goto try_again; +} + +/* + * delete an object representation from the cache + * - file backed objects are unlinked + * - directory backed objects are stuffed into the graveyard for userspace to + * delete + * - unlocks the directory mutex + */ +static int cachefiles_bury_object(struct cachefiles_cache *cache, + struct dentry *dir, + struct dentry *rep) +{ + struct dentry *grave, *trap; + char nbuffer[8 + 8 + 1]; + int ret; + + _enter(",'%*.*s','%*.*s'", + dir->d_name.len, dir->d_name.len, dir->d_name.name, + rep->d_name.len, rep->d_name.len, rep->d_name.name); + + /* non-directories can just be unlinked */ + if (!S_ISDIR(rep->d_inode->i_mode)) { + _debug("unlink stale object"); + ret = vfs_unlink(dir->d_inode, rep); + + mutex_unlock(&dir->d_inode->i_mutex); + + if (ret == -EIO) + cachefiles_io_error(cache, "Unlink failed"); + + _leave(" = %d", ret); + return ret; + } + + /* directories have to be moved to the graveyard */ + _debug("move stale object to graveyard"); + mutex_unlock(&dir->d_inode->i_mutex); + +try_again: + /* first step is to make up a grave dentry in the graveyard */ + sprintf(nbuffer, "%08x%08x", + (uint32_t) get_seconds(), + (uint32_t) atomic_inc_return(&cache->gravecounter)); + + /* do the multiway lock magic */ + trap = lock_rename(cache->graveyard, dir); + + /* do some checks before getting the grave dentry */ + if (rep->d_parent != dir) { + /* the entry was probably culled when we dropped the parent dir + * lock */ + unlock_rename(cache->graveyard, dir); + _leave(" = 0 [culled?]"); + return 0; + } + + if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Graveyard no longer a directory"); + return -EIO; + } + + if (trap == rep) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + if (d_mountpoint(rep)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Mountpoint in cache"); + return -EIO; + } + + grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); + if (IS_ERR(grave)) { + unlock_rename(cache->graveyard, dir); + + if (PTR_ERR(grave) == -ENOMEM) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + cachefiles_io_error(cache, "Lookup error %ld", + PTR_ERR(grave)); + return -EIO; + } + + if (grave->d_inode) { + unlock_rename(cache->graveyard, dir); + dput(grave); + grave = NULL; + cond_resched(); + goto try_again; + } + + if (d_mountpoint(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "Mountpoint in graveyard"); + return -EIO; + } + + /* target should not be an ancestor of source */ + if (trap == grave) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + /* attempt the rename */ + ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); + if (ret != 0 && ret != -ENOMEM) + cachefiles_io_error(cache, "Rename failed with error %d", ret); + + unlock_rename(cache->graveyard, dir); + dput(grave); + _leave(" = 0"); + return 0; +} + +/* + * delete an object representation from the cache + */ +int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct dentry *dir; + int ret; + + _enter(",{%p}", object->dentry); + + ASSERT(object->dentry); + ASSERT(object->dentry->d_inode); + ASSERT(object->dentry->d_parent); + + dir = dget_parent(object->dentry); + + mutex_lock(&dir->d_inode->i_mutex); + ret = cachefiles_bury_object(cache, dir, object->dentry); + + dput(dir); + _leave(" = %d", ret); + return ret; +} + +/* + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go + */ +int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + const char *key, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_cache *cache; + struct dentry *dir, *next = NULL; + unsigned long start; + const char *name; + int ret, nlen; + + _enter("{%p},,%s,", parent->dentry, key); + + cache = container_of(parent->fscache.cache, + struct cachefiles_cache, cache); + + ASSERT(parent->dentry); + ASSERT(parent->dentry->d_inode); + + if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { + // TODO: convert file to dir + _leave("looking up in none directory"); + return -ENOBUFS; + } + + dir = dget(parent->dentry); + +advance: + /* attempt to transit the first directory component */ + name = key; + nlen = strlen(key); + + /* key ends in a double NUL */ + key = key + nlen + 1; + if (!*key) + key = NULL; + +lookup_again: + /* search the current directory for the element name */ + _debug("lookup '%s'", name); + + mutex_lock(&dir->d_inode->i_mutex); + + start = jiffies; + next = lookup_one_len(name, dir, nlen); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(next)) + goto lookup_error; + + _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); + + if (!key) + object->new = !next->d_inode; + + /* if this element of the path doesn't exist, then the lookup phase + * failed, and we can release any readers in the certain knowledge that + * there's nothing for them to actually read */ + if (!next->d_inode) + fscache_object_lookup_negative(&object->fscache); + + /* we need to create the object if it's negative */ + if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { + /* index objects and intervening tree levels must be subdirs */ + if (!next->d_inode) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto create_error; + + start = jiffies; + ret = vfs_mkdir(dir->d_inode, next, 0); + cachefiles_hist(cachefiles_mkdir_histogram, start); + if (ret < 0) + goto create_error; + + ASSERT(next->d_inode); + + _debug("mkdir -> %p{%p{ino=%lu}}", + next, next->d_inode, next->d_inode->i_ino); + + } else if (!S_ISDIR(next->d_inode->i_mode)) { + kerror("inode %lu is not a directory", + next->d_inode->i_ino); + ret = -ENOBUFS; + goto error; + } + + } else { + /* non-index objects start out life as files */ + if (!next->d_inode) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto create_error; + + start = jiffies; + ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); + cachefiles_hist(cachefiles_create_histogram, start); + if (ret < 0) + goto create_error; + + ASSERT(next->d_inode); + + _debug("create -> %p{%p{ino=%lu}}", + next, next->d_inode, next->d_inode->i_ino); + + } else if (!S_ISDIR(next->d_inode->i_mode) && + !S_ISREG(next->d_inode->i_mode) + ) { + kerror("inode %lu is not a file or directory", + next->d_inode->i_ino); + ret = -ENOBUFS; + goto error; + } + } + + /* process the next component */ + if (key) { + _debug("advance"); + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); + dir = next; + next = NULL; + goto advance; + } + + /* we've found the object we were looking for */ + object->dentry = next; + + /* if we've found that the terminal object exists, then we need to + * check its attributes and delete it if it's out of date */ + if (!object->new) { + _debug("validate '%*.*s'", + next->d_name.len, next->d_name.len, next->d_name.name); + + ret = cachefiles_check_object_xattr(object, auxdata); + if (ret == -ESTALE) { + /* delete the object (the deleter drops the directory + * mutex) */ + object->dentry = NULL; + + ret = cachefiles_bury_object(cache, dir, next); + dput(next); + next = NULL; + + if (ret < 0) + goto delete_error; + + _debug("redo lookup"); + goto lookup_again; + } + } + + /* note that we're now using this object */ + cachefiles_mark_object_active(cache, object); + + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); + dir = NULL; + + _debug("=== OBTAINED_OBJECT ==="); + + if (object->new) { + /* attach data to a newly constructed terminal object */ + ret = cachefiles_set_object_xattr(object, auxdata); + if (ret < 0) + goto check_error; + } else { + /* always update the atime on an object we've just looked up + * (this is used to keep track of culling, and atimes are only + * updated by read, write and readdir but not lookup or + * open) */ + touch_atime(cache->mnt, next); + } + + /* open a file interface onto a data file */ + if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (S_ISREG(object->dentry->d_inode->i_mode)) { + const struct address_space_operations *aops; + + ret = -EPERM; + aops = object->dentry->d_inode->i_mapping->a_ops; + if (!aops->bmap) + goto check_error; + + object->backer = object->dentry; + } else { + BUG(); // TODO: open file in data-class subdir + } + } + + object->new = 0; + fscache_obtained_object(&object->fscache); + + _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); + return 0; + +create_error: + _debug("create error %d", ret); + if (ret == -EIO) + cachefiles_io_error(cache, "Create/mkdir failed"); + goto error; + +check_error: + _debug("check error %d", ret); + write_lock(&cache->active_lock); + rb_erase(&object->active_node, &cache->active_nodes); + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); + write_unlock(&cache->active_lock); + + dput(object->dentry); + object->dentry = NULL; + goto error_out; + +delete_error: + _debug("delete error %d", ret); + goto error_out2; + +lookup_error: + _debug("lookup error %ld", PTR_ERR(next)); + ret = PTR_ERR(next); + if (ret == -EIO) + cachefiles_io_error(cache, "Lookup failed"); + next = NULL; +error: + mutex_unlock(&dir->d_inode->i_mutex); + dput(next); +error_out2: + dput(dir); +error_out: + if (ret == -ENOSPC) + ret = -ENOBUFS; + + _leave(" = error %d", -ret); + return ret; +} + +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname) +{ + struct dentry *subdir; + unsigned long start; + int ret; + + _enter(",,%s", dirname); + + /* search the current directory for the element name */ + mutex_lock(&dir->d_inode->i_mutex); + + start = jiffies; + subdir = lookup_one_len(dirname, dir, strlen(dirname)); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(subdir)) { + if (PTR_ERR(subdir) == -ENOMEM) + goto nomem_d_alloc; + goto lookup_error; + } + + _debug("subdir -> %p %s", + subdir, subdir->d_inode ? "positive" : "negative"); + + /* we need to create the subdir if it doesn't exist yet */ + if (!subdir->d_inode) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto mkdir_error; + + _debug("attempt mkdir"); + + ret = vfs_mkdir(dir->d_inode, subdir, 0700); + if (ret < 0) + goto mkdir_error; + + ASSERT(subdir->d_inode); + + _debug("mkdir -> %p{%p{ino=%lu}}", + subdir, + subdir->d_inode, + subdir->d_inode->i_ino); + } + + mutex_unlock(&dir->d_inode->i_mutex); + + /* we need to make sure the subdir is a directory */ + ASSERT(subdir->d_inode); + + if (!S_ISDIR(subdir->d_inode->i_mode)) { + kerror("%s is not a directory", dirname); + ret = -EIO; + goto check_error; + } + + ret = -EPERM; + if (!subdir->d_inode->i_op || + !subdir->d_inode->i_op->setxattr || + !subdir->d_inode->i_op->getxattr || + !subdir->d_inode->i_op->lookup || + !subdir->d_inode->i_op->mkdir || + !subdir->d_inode->i_op->create || + !subdir->d_inode->i_op->rename || + !subdir->d_inode->i_op->rmdir || + !subdir->d_inode->i_op->unlink) + goto check_error; + + _leave(" = [%lu]", subdir->d_inode->i_ino); + return subdir; + +check_error: + dput(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); + +mkdir_error: + mutex_unlock(&dir->d_inode->i_mutex); + dput(subdir); + kerror("mkdir %s failed with error %d", dirname, ret); + return ERR_PTR(ret); + +lookup_error: + mutex_unlock(&dir->d_inode->i_mutex); + ret = PTR_ERR(subdir); + kerror("Lookup %s failed with error %d", dirname, ret); + return ERR_PTR(ret); + +nomem_d_alloc: + mutex_unlock(&dir->d_inode->i_mutex); + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * find out if an object is in use or not + * - if finds object and it's not in use: + * - returns a pointer to the object and a reference on it + * - returns with the directory locked + */ +static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, + struct dentry *dir, + char *filename) +{ + struct cachefiles_object *object; + struct rb_node *_n; + struct dentry *victim; + unsigned long start; + int ret; + + //_enter(",%*.*s/,%s", + // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + + /* look up the victim */ + mutex_lock_nested(&dir->d_inode->i_mutex, 1); + + start = jiffies; + victim = lookup_one_len(filename, dir, strlen(filename)); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(victim)) + goto lookup_error; + + //_debug("victim -> %p %s", + // victim, victim->d_inode ? "positive" : "negative"); + + /* if the object is no longer there then we probably retired the object + * at the netfs's request whilst the cull was in progress + */ + if (!victim->d_inode) { + mutex_unlock(&dir->d_inode->i_mutex); + dput(victim); + _leave(" = -ENOENT [absent]"); + return ERR_PTR(-ENOENT); + } + + /* check to see if we're using this object */ + read_lock(&cache->active_lock); + + _n = cache->active_nodes.rb_node; + + while (_n) { + object = rb_entry(_n, struct cachefiles_object, active_node); + + if (object->dentry > victim) + _n = _n->rb_left; + else if (object->dentry < victim) + _n = _n->rb_right; + else + goto object_in_use; + } + + read_unlock(&cache->active_lock); + + //_leave(" = %p", victim); + return victim; + +object_in_use: + read_unlock(&cache->active_lock); + mutex_unlock(&dir->d_inode->i_mutex); + dput(victim); + //_leave(" = -EBUSY [in use]"); + return ERR_PTR(-EBUSY); + +lookup_error: + mutex_unlock(&dir->d_inode->i_mutex); + ret = PTR_ERR(victim); + if (ret == -ENOENT) { + /* file or dir now absent - probably retired by netfs */ + _leave(" = -ESTALE [absent]"); + return ERR_PTR(-ESTALE); + } + + if (ret == -EIO) { + cachefiles_io_error(cache, "Lookup failed"); + } else if (ret != -ENOMEM) { + kerror("Internal error: %d", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ERR_PTR(ret); +} + +/* + * cull an object if it's not in use + * - called only by cache manager daemon + */ +int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + int ret; + + _enter(",%*.*s/,%s", + dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + + victim = cachefiles_check_active(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + _debug("victim -> %p %s", + victim, victim->d_inode ? "positive" : "negative"); + + /* okay... the victim is not being used so we can cull it + * - start by marking it as stale + */ + _debug("victim is cullable"); + + ret = cachefiles_remove_object_xattr(cache, victim); + if (ret < 0) + goto error_unlock; + + /* actually remove the victim (drops the dir mutex) */ + _debug("bury"); + + ret = cachefiles_bury_object(cache, dir, victim); + if (ret < 0) + goto error; + + dput(victim); + _leave(" = 0"); + return 0; + +error_unlock: + mutex_unlock(&dir->d_inode->i_mutex); +error: + dput(victim); + if (ret == -ENOENT) { + /* file or dir now absent - probably retired by netfs */ + _leave(" = -ESTALE [absent]"); + return -ESTALE; + } + + if (ret != -ENOMEM) { + kerror("Internal error: %d", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * find out if an object is in use or not + * - called only by cache manager daemon + * - returns -EBUSY or 0 to indicate whether an object is in use or not + */ +int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + + //_enter(",%*.*s/,%s", + // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + + victim = cachefiles_check_active(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + mutex_unlock(&dir->d_inode->i_mutex); + dput(victim); + //_leave(" = 0"); + return 0; +} diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c new file mode 100644 index 00000000000..eccd3394119 --- /dev/null +++ b/fs/cachefiles/proc.c @@ -0,0 +1,134 @@ +/* CacheFiles statistics + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include "internal.h" + +atomic_t cachefiles_lookup_histogram[HZ]; +atomic_t cachefiles_mkdir_histogram[HZ]; +atomic_t cachefiles_create_histogram[HZ]; + +/* + * display the latency histogram + */ +static int cachefiles_histogram_show(struct seq_file *m, void *v) +{ + unsigned long index; + unsigned x, y, z, t; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); + return 0; + case 2: + seq_puts(m, "===== ===== ========= ========= =========\n"); + return 0; + default: + index = (unsigned long) v - 3; + x = atomic_read(&cachefiles_lookup_histogram[index]); + y = atomic_read(&cachefiles_mkdir_histogram[index]); + z = atomic_read(&cachefiles_create_histogram[index]); + if (x == 0 && y == 0 && z == 0) + return 0; + + t = (index * 1000) / HZ; + + seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); + return 0; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) +{ + if ((unsigned long long)*_pos >= HZ + 2) + return NULL; + if (*_pos == 0) + *_pos = 1; + return (void *)(unsigned long) *_pos; +} + +/* + * move to the next line + */ +static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (unsigned long long)*pos > HZ + 2 ? + NULL : (void *)(unsigned long) *pos; +} + +/* + * clean up after reading + */ +static void cachefiles_histogram_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations cachefiles_histogram_ops = { + .start = cachefiles_histogram_start, + .stop = cachefiles_histogram_stop, + .next = cachefiles_histogram_next, + .show = cachefiles_histogram_show, +}; + +/* + * open "/proc/fs/cachefiles/XXX" which provide statistics summaries + */ +static int cachefiles_histogram_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cachefiles_histogram_ops); +} + +static const struct file_operations cachefiles_histogram_fops = { + .owner = THIS_MODULE, + .open = cachefiles_histogram_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * initialise the /proc/fs/cachefiles/ directory + */ +int __init cachefiles_proc_init(void) +{ + _enter(""); + + if (!proc_mkdir("fs/cachefiles", NULL)) + goto error_dir; + + if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, + &cachefiles_histogram_fops)) + goto error_histogram; + + _leave(" = 0"); + return 0; + +error_histogram: + remove_proc_entry("fs/cachefiles", NULL); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/cachefiles/ directory + */ +void cachefiles_proc_cleanup(void) +{ + remove_proc_entry("fs/cachefiles/histogram", NULL); + remove_proc_entry("fs/cachefiles", NULL); +} diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c new file mode 100644 index 00000000000..a69787e7dd9 --- /dev/null +++ b/fs/cachefiles/rdwr.c @@ -0,0 +1,879 @@ +/* Storage object read/write + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/mount.h> +#include <linux/file.h> +#include "internal.h" + +/* + * detect wake up events generated by the unlocking of pages in which we're + * interested + * - we use this to detect read completion of backing pages + * - the caller holds the waitqueue lock + */ +static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, + int sync, void *_key) +{ + struct cachefiles_one_read *monitor = + container_of(wait, struct cachefiles_one_read, monitor); + struct cachefiles_object *object; + struct wait_bit_key *key = _key; + struct page *page = wait->private; + + ASSERT(key); + + _enter("{%lu},%u,%d,{%p,%u}", + monitor->netfs_page->index, mode, sync, + key->flags, key->bit_nr); + + if (key->flags != &page->flags || + key->bit_nr != PG_locked) + return 0; + + _debug("--- monitor %p %lx ---", page, page->flags); + + if (!PageUptodate(page) && !PageError(page)) + dump_stack(); + + /* remove from the waitqueue */ + list_del(&wait->task_list); + + /* move onto the action list and queue for FS-Cache thread pool */ + ASSERT(monitor->op); + + object = container_of(monitor->op->op.object, + struct cachefiles_object, fscache); + + spin_lock(&object->work_lock); + list_add_tail(&monitor->op_link, &monitor->op->to_do); + spin_unlock(&object->work_lock); + + fscache_enqueue_retrieval(monitor->op); + return 0; +} + +/* + * copy data from backing pages to netfs pages to complete a read operation + * - driven by FS-Cache's thread pool + */ +static void cachefiles_read_copier(struct fscache_operation *_op) +{ + struct cachefiles_one_read *monitor; + struct cachefiles_object *object; + struct fscache_retrieval *op; + struct pagevec pagevec; + int error, max; + + op = container_of(_op, struct fscache_retrieval, op); + object = container_of(op->op.object, + struct cachefiles_object, fscache); + + _enter("{ino=%lu}", object->backer->d_inode->i_ino); + + pagevec_init(&pagevec, 0); + + max = 8; + spin_lock_irq(&object->work_lock); + + while (!list_empty(&op->to_do)) { + monitor = list_entry(op->to_do.next, + struct cachefiles_one_read, op_link); + list_del(&monitor->op_link); + + spin_unlock_irq(&object->work_lock); + + _debug("- copy {%lu}", monitor->back_page->index); + + error = -EIO; + if (PageUptodate(monitor->back_page)) { + copy_highpage(monitor->netfs_page, monitor->back_page); + + pagevec_add(&pagevec, monitor->netfs_page); + fscache_mark_pages_cached(monitor->op, &pagevec); + error = 0; + } + + if (error) + cachefiles_io_error_obj( + object, + "Readpage failed on backing file %lx", + (unsigned long) monitor->back_page->flags); + + page_cache_release(monitor->back_page); + + fscache_end_io(op, monitor->netfs_page, error); + page_cache_release(monitor->netfs_page); + fscache_put_retrieval(op); + kfree(monitor); + + /* let the thread pool have some air occasionally */ + max--; + if (max < 0 || need_resched()) { + if (!list_empty(&op->to_do)) + fscache_enqueue_retrieval(op); + _leave(" [maxed out]"); + return; + } + + spin_lock_irq(&object->work_lock); + } + + spin_unlock_irq(&object->work_lock); + _leave(""); +} + +/* + * read the corresponding page to the given set from the backing file + * - an uncertain page is simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file_one(struct cachefiles_object *object, + struct fscache_retrieval *op, + struct page *netpage, + struct pagevec *pagevec) +{ + struct cachefiles_one_read *monitor; + struct address_space *bmapping; + struct page *newpage, *backpage; + int ret; + + _enter(""); + + pagevec_reinit(pagevec); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + if (!monitor) + goto nomem; + + monitor->netfs_page = netpage; + monitor->op = fscache_get_retrieval(op); + + init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); + + /* attempt to get hold of the backing page */ + bmapping = object->backer->d_inode->i_mapping; + newpage = NULL; + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = page_cache_alloc_cold(bmapping); + if (!newpage) + goto nomem_monitor; + } + + ret = add_to_page_cache(newpage, bmapping, + netpage->index, GFP_KERNEL); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem_page; + } + + /* we've installed a new backing page, so now we need to add it + * to the LRU list and start it reading */ +installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + + page_cache_get(backpage); + pagevec_add(pagevec, backpage); + __pagevec_lru_add_file(pagevec); + +read_backing_page: + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto read_error; + + /* set the monitor to transfer the data across */ +monitor_backing_page: + _debug("- monitor add"); + + /* install the monitor */ + page_cache_get(monitor->netfs_page); + page_cache_get(backpage); + monitor->back_page = backpage; + monitor->monitor.private = backpage; + add_page_wait_queue(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was installed, so + * the monitor may miss the event - so we have to ensure that we do get + * one in such a case */ + if (trylock_page(backpage)) { + _debug("jumpstart %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + goto success; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ +backing_page_already_present: + _debug("- present"); + + if (newpage) { + page_cache_release(newpage); + newpage = NULL; + } + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + if (!trylock_page(backpage)) + goto monitor_backing_page; + _debug("read %p {%lx}", backpage, backpage->flags); + goto read_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ +backing_page_already_uptodate: + _debug("- uptodate"); + + pagevec_add(pagevec, netpage); + fscache_mark_pages_cached(op, pagevec); + + copy_highpage(netpage, backpage); + fscache_end_io(op, netpage, 0); + +success: + _debug("success"); + ret = 0; + +out: + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_retrieval(monitor->op); + kfree(monitor); + } + _leave(" = %d", ret); + return ret; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) + goto out; +io_error: + cachefiles_io_error_obj(object, "Page read error on backing file"); + ret = -ENOBUFS; + goto out; + +nomem_page: + page_cache_release(newpage); +nomem_monitor: + fscache_put_retrieval(monitor->op); + kfree(monitor); +nomem: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * read a page from the cache or allocate a block in which to store it + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - if the page is backed by a block in the cache: + * - a read will be started which will call the callback on completion + * - 0 will be returned + * - else if the page is unbacked: + * - the metadata will be retained + * - -ENODATA will be returned + */ +int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct pagevec pagevec; + struct inode *inode; + sector_t block0, block; + unsigned shift; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("{%p},{%lx},,,", object, page->index); + + if (!object->backer) + return -ENOBUFS; + + inode = object->backer->d_inode; + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + return -ENOBUFS; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + op->op.flags = FSCACHE_OP_FAST; + op->op.processor = cachefiles_read_copier; + + pagevec_init(&pagevec, 0); + + /* we assume the absence or presence of the first block is a good + * enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually good + * enough for this as it doesn't indicate errors, but it's all we've + * got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); + _debug("%llx -> %llx", + (unsigned long long) block0, + (unsigned long long) block); + + if (block) { + /* submit the apparently valid page to the backing fs to be + * read from disk */ + ret = cachefiles_read_backing_file_one(object, op, page, + &pagevec); + } else if (cachefiles_has_space(cache, 0, 1) == 0) { + /* there's space in the cache we can use */ + pagevec_add(&pagevec, page); + fscache_mark_pages_cached(op, &pagevec); + ret = -ENODATA; + } else { + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * read the corresponding pages to the given set from the backing file + * - any uncertain pages are simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file(struct cachefiles_object *object, + struct fscache_retrieval *op, + struct list_head *list, + struct pagevec *mark_pvec) +{ + struct cachefiles_one_read *monitor = NULL; + struct address_space *bmapping = object->backer->d_inode->i_mapping; + struct pagevec lru_pvec; + struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; + int ret = 0; + + _enter(""); + + pagevec_init(&lru_pvec, 0); + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + if (!monitor) { + monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + if (!monitor) + goto nomem; + + monitor->op = fscache_get_retrieval(op); + init_waitqueue_func_entry(&monitor->monitor, + cachefiles_read_waiter); + } + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = page_cache_alloc_cold(bmapping); + if (!newpage) + goto nomem; + } + + ret = add_to_page_cache(newpage, bmapping, + netpage->index, GFP_KERNEL); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem; + } + + /* we've installed a new backing page, so now we need to add it + * to the LRU list and start it reading */ + installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + + page_cache_get(backpage); + if (!pagevec_add(&lru_pvec, backpage)) + __pagevec_lru_add_file(&lru_pvec); + + reread_backing_page: + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto read_error; + + /* add the netfs page to the pagecache and LRU, and set the + * monitor to transfer the data across */ + monitor_backing_page: + _debug("- monitor add"); + + ret = add_to_page_cache(netpage, op->mapping, netpage->index, + GFP_KERNEL); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + continue; + } + goto nomem; + } + + page_cache_get(netpage); + if (!pagevec_add(&lru_pvec, netpage)) + __pagevec_lru_add_file(&lru_pvec); + + /* install a monitor */ + page_cache_get(netpage); + monitor->netfs_page = netpage; + + page_cache_get(backpage); + monitor->back_page = backpage; + monitor->monitor.private = backpage; + add_page_wait_queue(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was + * installed, so the monitor may miss the event - so we have to + * ensure that we do get one in such a case */ + if (trylock_page(backpage)) { + _debug("2unlock %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + + page_cache_release(backpage); + backpage = NULL; + + page_cache_release(netpage); + netpage = NULL; + continue; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ + backing_page_already_present: + _debug("- present %p", backpage); + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + _debug("- not ready %p{%lx}", backpage, backpage->flags); + + if (!trylock_page(backpage)) + goto monitor_backing_page; + + if (PageError(backpage)) { + _debug("error %lx", backpage->flags); + unlock_page(backpage); + goto io_error; + } + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate_unlock; + + /* we've locked a page that's neither up to date nor erroneous, + * so we need to attempt to read it again */ + goto reread_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ + backing_page_already_uptodate_unlock: + _debug("uptodate %lx", backpage->flags); + unlock_page(backpage); + backing_page_already_uptodate: + _debug("- uptodate"); + + ret = add_to_page_cache(netpage, op->mapping, netpage->index, + GFP_KERNEL); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + continue; + } + goto nomem; + } + + copy_highpage(netpage, backpage); + + page_cache_release(backpage); + backpage = NULL; + + if (!pagevec_add(mark_pvec, netpage)) + fscache_mark_pages_cached(op, mark_pvec); + + page_cache_get(netpage); + if (!pagevec_add(&lru_pvec, netpage)) + __pagevec_lru_add_file(&lru_pvec); + + fscache_end_io(op, netpage, 0); + page_cache_release(netpage); + netpage = NULL; + continue; + } + + netpage = NULL; + + _debug("out"); + +out: + /* tidy up */ + pagevec_lru_add_file(&lru_pvec); + + if (newpage) + page_cache_release(newpage); + if (netpage) + page_cache_release(netpage); + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_retrieval(op); + kfree(monitor); + } + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + page_cache_release(netpage); + } + + _leave(" = %d", ret); + return ret; + +nomem: + _debug("nomem"); + ret = -ENOMEM; + goto out; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) + goto out; +io_error: + cachefiles_io_error_obj(object, "Page read error on backing file"); + ret = -ENOBUFS; + goto out; +} + +/* + * read a list of pages from the cache or allocate blocks in which to store + * them + */ +int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct list_head backpages; + struct pagevec pagevec; + struct inode *inode; + struct page *page, *_n; + unsigned shift, nrbackpages; + int ret, ret2, space; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("{OBJ%x,%d},,%d,,", + object->fscache.debug_id, atomic_read(&op->op.usage), + *nr_pages); + + if (!object->backer) + return -ENOBUFS; + + space = 1; + if (cachefiles_has_space(cache, 0, *nr_pages) < 0) + space = 0; + + inode = object->backer->d_inode; + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + return -ENOBUFS; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + pagevec_init(&pagevec, 0); + + op->op.flags = FSCACHE_OP_FAST; + op->op.processor = cachefiles_read_copier; + + INIT_LIST_HEAD(&backpages); + nrbackpages = 0; + + ret = space ? -ENODATA : -ENOBUFS; + list_for_each_entry_safe(page, _n, pages, lru) { + sector_t block0, block; + + /* we assume the absence or presence of the first block is a + * good enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually + * good enough for this as it doesn't indicate errors, but + * it's all we've got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, + block0); + _debug("%llx -> %llx", + (unsigned long long) block0, + (unsigned long long) block); + + if (block) { + /* we have data - add it to the list to give to the + * backing fs */ + list_move(&page->lru, &backpages); + (*nr_pages)--; + nrbackpages++; + } else if (space && pagevec_add(&pagevec, page) == 0) { + fscache_mark_pages_cached(op, &pagevec); + ret = -ENODATA; + } + } + + if (pagevec_count(&pagevec) > 0) + fscache_mark_pages_cached(op, &pagevec); + + if (list_empty(pages)) + ret = 0; + + /* submit the apparently valid pages to the backing fs to be read from + * disk */ + if (nrbackpages > 0) { + ret2 = cachefiles_read_backing_file(object, op, &backpages, + &pagevec); + if (ret2 == -ENOMEM || ret2 == -EINTR) + ret = ret2; + } + + if (pagevec_count(&pagevec) > 0) + fscache_mark_pages_cached(op, &pagevec); + + _leave(" = %d [nr=%u%s]", + ret, *nr_pages, list_empty(pages) ? " empty" : ""); + return ret; +} + +/* + * allocate a block in the cache in which to store a page + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - otherwise: + * - the metadata will be retained + * - 0 will be returned + */ +int cachefiles_allocate_page(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct pagevec pagevec; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lx},", object, page->index); + + ret = cachefiles_has_space(cache, 0, 1); + if (ret == 0) { + pagevec_init(&pagevec, 0); + pagevec_add(&pagevec, page); + fscache_mark_pages_cached(op, &pagevec); + } else { + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * allocate blocks in the cache in which to store a set of pages + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if some buffers couldn't be made available + * - returns -ENOBUFS if some pages are beyond EOF + * - otherwise: + * - -ENODATA will be returned + * - metadata will be retained for any page marked + */ +int cachefiles_allocate_pages(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct pagevec pagevec; + struct page *page; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,,,%d,", object, *nr_pages); + + ret = cachefiles_has_space(cache, 0, *nr_pages); + if (ret == 0) { + pagevec_init(&pagevec, 0); + + list_for_each_entry(page, pages, lru) { + if (pagevec_add(&pagevec, page) == 0) + fscache_mark_pages_cached(op, &pagevec); + } + + if (pagevec_count(&pagevec) > 0) + fscache_mark_pages_cached(op, &pagevec); + ret = -ENODATA; + } else { + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * request a page be stored in the cache + * - cache withdrawal is prevented by the caller + * - this request may be ignored if there's no cache block available, in which + * case -ENOBUFS will be returned + * - if the op is in progress, 0 will be returned + */ +int cachefiles_write_page(struct fscache_storage *op, struct page *page) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + mm_segment_t old_fs; + struct file *file; + loff_t pos; + void *data; + int ret; + + ASSERT(op != NULL); + ASSERT(page != NULL); + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + + _enter("%p,%p{%lx},,,", object, page, page->index); + + if (!object->backer) { + _leave(" = -ENOBUFS"); + return -ENOBUFS; + } + + ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + /* write the page to the backing filesystem and let it store it in its + * own time */ + dget(object->backer); + mntget(cache->mnt); + file = dentry_open(object->backer, cache->mnt, O_RDWR, + cache->cache_cred); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + } else { + ret = -EIO; + if (file->f_op->write) { + pos = (loff_t) page->index << PAGE_SHIFT; + data = kmap(page); + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = file->f_op->write( + file, (const void __user *) data, PAGE_SIZE, + &pos); + set_fs(old_fs); + kunmap(page); + if (ret != PAGE_SIZE) + ret = -EIO; + } + fput(file); + } + + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error_obj( + object, "Write page to backing file failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * detach a backing block from a page + * - cache withdrawal is prevented by the caller + */ +void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lu}", object, page->index); + + spin_unlock(&object->fscache.cookie->lock); +} diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c new file mode 100644 index 00000000000..b5808cdb223 --- /dev/null +++ b/fs/cachefiles/security.c @@ -0,0 +1,116 @@ +/* CacheFiles security management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fs.h> +#include <linux/cred.h> +#include "internal.h" + +/* + * determine the security context within which we access the cache from within + * the kernel + */ +int cachefiles_get_security_ID(struct cachefiles_cache *cache) +{ + struct cred *new; + int ret; + + _enter("{%s}", cache->secctx); + + new = prepare_kernel_cred(current); + if (!new) { + ret = -ENOMEM; + goto error; + } + + if (cache->secctx) { + ret = set_security_override_from_ctx(new, cache->secctx); + if (ret < 0) { + put_cred(new); + printk(KERN_ERR "CacheFiles:" + " Security denies permission to nominate" + " security context: error %d\n", + ret); + goto error; + } + } + + cache->cache_cred = new; + ret = 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * see if mkdir and create can be performed in the root directory + */ +static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, + struct dentry *root) +{ + int ret; + + ret = security_inode_mkdir(root->d_inode, root, 0); + if (ret < 0) { + printk(KERN_ERR "CacheFiles:" + " Security denies permission to make dirs: error %d", + ret); + return ret; + } + + ret = security_inode_create(root->d_inode, root, 0); + if (ret < 0) + printk(KERN_ERR "CacheFiles:" + " Security denies permission to create files: error %d", + ret); + + return ret; +} + +/* + * check the security details of the on-disk cache + * - must be called with security override in force + */ +int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred) +{ + struct cred *new; + int ret; + + _enter(""); + + /* duplicate the cache creds for COW (the override is currently in + * force, so we can use prepare_creds() to do this) */ + new = prepare_creds(); + if (!new) + return -ENOMEM; + + cachefiles_end_secure(cache, *_saved_cred); + + /* use the cache root dir's security context as the basis with + * which create files */ + ret = set_create_files_as(new, root->d_inode); + if (ret < 0) { + _leave(" = %d [cfa]", ret); + return ret; + } + + put_cred(cache->cache_cred); + cache->cache_cred = new; + + cachefiles_begin_secure(cache, _saved_cred); + ret = cachefiles_check_cache_dir(cache, root); + + if (ret == -EOPNOTSUPP) + ret = 0; + _leave(" = %d", ret); + return ret; +} diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c new file mode 100644 index 00000000000..f3e7a0bf068 --- /dev/null +++ b/fs/cachefiles/xattr.c @@ -0,0 +1,291 @@ +/* CacheFiles extended attribute management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/fsnotify.h> +#include <linux/quotaops.h> +#include <linux/xattr.h> +#include "internal.h" + +static const char cachefiles_xattr_cache[] = + XATTR_USER_PREFIX "CacheFiles.cache"; + +/* + * check the type label on an object + * - done using xattrs + */ +int cachefiles_check_object_type(struct cachefiles_object *object) +{ + struct dentry *dentry = object->dentry; + char type[3], xtype[3]; + int ret; + + ASSERT(dentry); + ASSERT(dentry->d_inode); + + if (!object->fscache.cookie) + strcpy(type, "C3"); + else + snprintf(type, 3, "%02x", object->fscache.cookie->def->type); + + _enter("%p{%s}", object, type); + + /* attempt to install a type label directly */ + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2, + XATTR_CREATE); + if (ret == 0) { + _debug("SET"); /* we succeeded */ + goto error; + } + + if (ret != -EEXIST) { + kerror("Can't set xattr on %*.*s [%lu] (err %d)", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + -ret); + goto error; + } + + /* read the current type label */ + ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3); + if (ret < 0) { + if (ret == -ERANGE) + goto bad_type_length; + + kerror("Can't read xattr on %*.*s [%lu] (err %d)", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + -ret); + goto error; + } + + /* check the type is what we're expecting */ + if (ret != 2) + goto bad_type_length; + + if (xtype[0] != type[0] || xtype[1] != type[1]) + goto bad_type; + + ret = 0; + +error: + _leave(" = %d", ret); + return ret; + +bad_type_length: + kerror("Cache object %lu type xattr length incorrect", + dentry->d_inode->i_ino); + ret = -EIO; + goto error; + +bad_type: + xtype[2] = 0; + kerror("Cache object %*.*s [%lu] type %s not %s", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + xtype, type); + ret = -EIO; + goto error; +} + +/* + * set the state xattr on a cache file + */ +int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct dentry *dentry = object->dentry; + int ret; + + ASSERT(object->fscache.cookie); + ASSERT(dentry); + + _enter("%p,#%d", object, auxdata->len); + + /* attempt to install the cache metadata directly */ + _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); + + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_CREATE); + if (ret < 0 && ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to set xattr with error %d", ret); + + _leave(" = %d", ret); + return ret; +} + +/* + * update the state xattr on a cache file + */ +int cachefiles_update_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct dentry *dentry = object->dentry; + int ret; + + ASSERT(object->fscache.cookie); + ASSERT(dentry); + + _enter("%p,#%d", object, auxdata->len); + + /* attempt to install the cache metadata directly */ + _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); + + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_REPLACE); + if (ret < 0 && ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to update xattr with error %d", ret); + + _leave(" = %d", ret); + return ret; +} + +/* + * check the state xattr on a cache file + * - return -ESTALE if the object should be deleted + */ +int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_xattr *auxbuf; + struct dentry *dentry = object->dentry; + int ret; + + _enter("%p,#%d", object, auxdata->len); + + ASSERT(dentry); + ASSERT(dentry->d_inode); + + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); + if (!auxbuf) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + /* read the current type label */ + ret = vfs_getxattr(dentry, cachefiles_xattr_cache, + &auxbuf->type, 512 + 1); + if (ret < 0) { + if (ret == -ENODATA) + goto stale; /* no attribute - power went off + * mid-cull? */ + + if (ret == -ERANGE) + goto bad_type_length; + + cachefiles_io_error_obj(object, + "Can't read xattr on %lu (err %d)", + dentry->d_inode->i_ino, -ret); + goto error; + } + + /* check the on-disk object */ + if (ret < 1) + goto bad_type_length; + + if (auxbuf->type != auxdata->type) + goto stale; + + auxbuf->len = ret; + + /* consult the netfs */ + if (object->fscache.cookie->def->check_aux) { + enum fscache_checkaux result; + unsigned int dlen; + + dlen = auxbuf->len - 1; + + _debug("checkaux %s #%u", + object->fscache.cookie->def->name, dlen); + + result = fscache_check_aux(&object->fscache, + &auxbuf->data, dlen); + + switch (result) { + /* entry okay as is */ + case FSCACHE_CHECKAUX_OKAY: + goto okay; + + /* entry requires update */ + case FSCACHE_CHECKAUX_NEEDS_UPDATE: + break; + + /* entry requires deletion */ + case FSCACHE_CHECKAUX_OBSOLETE: + goto stale; + + default: + BUG(); + } + + /* update the current label */ + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_REPLACE); + if (ret < 0) { + cachefiles_io_error_obj(object, + "Can't update xattr on %lu" + " (error %d)", + dentry->d_inode->i_ino, -ret); + goto error; + } + } + +okay: + ret = 0; + +error: + kfree(auxbuf); + _leave(" = %d", ret); + return ret; + +bad_type_length: + kerror("Cache object %lu xattr length incorrect", + dentry->d_inode->i_ino); + ret = -EIO; + goto error; + +stale: + ret = -ESTALE; + goto error; +} + +/* + * remove the object's xattr to mark it stale + */ +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + int ret; + + ret = vfs_removexattr(dentry, cachefiles_xattr_cache); + if (ret < 0) { + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + dentry->d_inode->i_ino, -ret); + } + + _leave(" = %d", ret); + return ret; +} diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig new file mode 100644 index 00000000000..9bbb8ce7bea --- /dev/null +++ b/fs/fscache/Kconfig @@ -0,0 +1,56 @@ + +config FSCACHE + tristate "General filesystem local caching manager" + depends on EXPERIMENTAL + select SLOW_WORK + help + This option enables a generic filesystem caching manager that can be + used by various network and other filesystems to cache data locally. + Different sorts of caches can be plugged in, depending on the + resources available. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_STATS + bool "Gather statistical information on local caching" + depends on FSCACHE && PROC_FS + help + This option causes statistical information to be gathered on local + caching and exported through file: + + /proc/fs/fscache/stats + + The gathering of statistics adds a certain amount of overhead to + execution as there are a quite a few stats gathered, and on a + multi-CPU system these may be on cachelines that keep bouncing + between CPUs. On the other hand, the stats are very useful for + debugging purposes. Saying 'Y' here is recommended. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_HISTOGRAM + bool "Gather latency information on local caching" + depends on FSCACHE && PROC_FS + help + This option causes latency information to be gathered on local + caching and exported through file: + + /proc/fs/fscache/histogram + + The generation of this histogram adds a certain amount of overhead to + execution as there are a number of points at which data is gathered, + and on a multi-CPU system these may be on cachelines that keep + bouncing between CPUs. On the other hand, the histogram may be + useful for debugging purposes. Saying 'N' here is recommended. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_DEBUG + bool "Debug FS-Cache" + depends on FSCACHE + help + This permits debugging to be dynamically enabled in the local caching + management module. If this is set, the debugging output may be + enabled by setting bits in /sys/modules/fscache/parameter/debug. + + See Documentation/filesystems/caching/fscache.txt for more information. diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile new file mode 100644 index 00000000000..91571b95aac --- /dev/null +++ b/fs/fscache/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for general filesystem caching code +# + +fscache-y := \ + cache.o \ + cookie.o \ + fsdef.o \ + main.o \ + netfs.o \ + object.o \ + operation.o \ + page.o + +fscache-$(CONFIG_PROC_FS) += proc.o +fscache-$(CONFIG_FSCACHE_STATS) += stats.o +fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o + +obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c new file mode 100644 index 00000000000..e21985bbb1f --- /dev/null +++ b/fs/fscache/cache.c @@ -0,0 +1,415 @@ +/* FS-Cache cache handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL CACHE +#include <linux/module.h> +#include <linux/slab.h> +#include "internal.h" + +LIST_HEAD(fscache_cache_list); +DECLARE_RWSEM(fscache_addremove_sem); +DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq); +EXPORT_SYMBOL(fscache_cache_cleared_wq); + +static LIST_HEAD(fscache_cache_tag_list); + +/* + * look up a cache tag + */ +struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name) +{ + struct fscache_cache_tag *tag, *xtag; + + /* firstly check for the existence of the tag under read lock */ + down_read(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_read(&fscache_addremove_sem); + return tag; + } + } + + up_read(&fscache_addremove_sem); + + /* the tag does not exist - create a candidate */ + xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL); + if (!xtag) + /* return a dummy tag if out of memory */ + return ERR_PTR(-ENOMEM); + + atomic_set(&xtag->usage, 1); + strcpy(xtag->name, name); + + /* write lock, search again and add if still not present */ + down_write(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_write(&fscache_addremove_sem); + kfree(xtag); + return tag; + } + } + + list_add_tail(&xtag->link, &fscache_cache_tag_list); + up_write(&fscache_addremove_sem); + return xtag; +} + +/* + * release a reference to a cache tag + */ +void __fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ + if (tag != ERR_PTR(-ENOMEM)) { + down_write(&fscache_addremove_sem); + + if (atomic_dec_and_test(&tag->usage)) + list_del_init(&tag->link); + else + tag = NULL; + + up_write(&fscache_addremove_sem); + + kfree(tag); + } +} + +/* + * select a cache in which to store an object + * - the cache addremove semaphore must be at least read-locked by the caller + * - the object will never be an index + */ +struct fscache_cache *fscache_select_cache_for_object( + struct fscache_cookie *cookie) +{ + struct fscache_cache_tag *tag; + struct fscache_object *object; + struct fscache_cache *cache; + + _enter(""); + + if (list_empty(&fscache_cache_list)) { + _leave(" = NULL [no cache]"); + return NULL; + } + + /* we check the parent to determine the cache to use */ + spin_lock(&cookie->lock); + + /* the first in the parent's backing list should be the preferred + * cache */ + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + cache = object->cache; + if (object->state >= FSCACHE_OBJECT_DYING || + test_bit(FSCACHE_IOERROR, &cache->flags)) + cache = NULL; + + spin_unlock(&cookie->lock); + _leave(" = %p [parent]", cache); + return cache; + } + + /* the parent is unbacked */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + /* cookie not an index and is unbacked */ + spin_unlock(&cookie->lock); + _leave(" = NULL [cookie ub,ni]"); + return NULL; + } + + spin_unlock(&cookie->lock); + + if (!cookie->def->select_cache) + goto no_preference; + + /* ask the netfs for its preference */ + tag = cookie->def->select_cache(cookie->parent->netfs_data, + cookie->netfs_data); + if (!tag) + goto no_preference; + + if (tag == ERR_PTR(-ENOMEM)) { + _leave(" = NULL [nomem tag]"); + return NULL; + } + + if (!tag->cache) { + _leave(" = NULL [unbacked tag]"); + return NULL; + } + + if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) + return NULL; + + _leave(" = %p [specific]", tag->cache); + return tag->cache; + +no_preference: + /* netfs has no preference - just select first cache */ + cache = list_entry(fscache_cache_list.next, + struct fscache_cache, link); + _leave(" = %p [first]", cache); + return cache; +} + +/** + * fscache_init_cache - Initialise a cache record + * @cache: The cache record to be initialised + * @ops: The cache operations to be installed in that record + * @idfmt: Format string to define identifier + * @...: sprintf-style arguments + * + * Initialise a record of a cache and fill in the name. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +void fscache_init_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + const char *idfmt, + ...) +{ + va_list va; + + memset(cache, 0, sizeof(*cache)); + + cache->ops = ops; + + va_start(va, idfmt); + vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va); + va_end(va); + + INIT_WORK(&cache->op_gc, fscache_operation_gc); + INIT_LIST_HEAD(&cache->link); + INIT_LIST_HEAD(&cache->object_list); + INIT_LIST_HEAD(&cache->op_gc_list); + spin_lock_init(&cache->object_list_lock); + spin_lock_init(&cache->op_gc_list_lock); +} +EXPORT_SYMBOL(fscache_init_cache); + +/** + * fscache_add_cache - Declare a cache as being open for business + * @cache: The record describing the cache + * @ifsdef: The record of the cache object describing the top-level index + * @tagname: The tag describing this cache + * + * Add a cache to the system, making it available for netfs's to use. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *ifsdef, + const char *tagname) +{ + struct fscache_cache_tag *tag; + + BUG_ON(!cache->ops); + BUG_ON(!ifsdef); + + cache->flags = 0; + ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + ifsdef->state = FSCACHE_OBJECT_ACTIVE; + + if (!tagname) + tagname = cache->identifier; + + BUG_ON(!tagname[0]); + + _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname); + + /* we use the cache tag to uniquely identify caches */ + tag = __fscache_lookup_cache_tag(tagname); + if (IS_ERR(tag)) + goto nomem; + + if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags)) + goto tag_in_use; + + cache->kobj = kobject_create_and_add(tagname, fscache_root); + if (!cache->kobj) + goto error; + + ifsdef->cookie = &fscache_fsdef_index; + ifsdef->cache = cache; + cache->fsdef = ifsdef; + + down_write(&fscache_addremove_sem); + + tag->cache = cache; + cache->tag = tag; + + /* add the cache to the list */ + list_add(&cache->link, &fscache_cache_list); + + /* add the cache's netfs definition index object to the cache's + * list */ + spin_lock(&cache->object_list_lock); + list_add_tail(&ifsdef->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + + /* add the cache's netfs definition index object to the top level index + * cookie as a known backing object */ + spin_lock(&fscache_fsdef_index.lock); + + hlist_add_head(&ifsdef->cookie_link, + &fscache_fsdef_index.backing_objects); + + atomic_inc(&fscache_fsdef_index.usage); + + /* done */ + spin_unlock(&fscache_fsdef_index.lock); + up_write(&fscache_addremove_sem); + + printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n", + cache->tag->name, cache->ops->name); + kobject_uevent(cache->kobj, KOBJ_ADD); + + _leave(" = 0 [%s]", cache->identifier); + return 0; + +tag_in_use: + printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname); + __fscache_release_cache_tag(tag); + _leave(" = -EXIST"); + return -EEXIST; + +error: + __fscache_release_cache_tag(tag); + _leave(" = -EINVAL"); + return -EINVAL; + +nomem: + _leave(" = -ENOMEM"); + return -ENOMEM; +} +EXPORT_SYMBOL(fscache_add_cache); + +/** + * fscache_io_error - Note a cache I/O error + * @cache: The record describing the cache + * + * Note that an I/O error occurred in a cache and that it should no longer be + * used for anything. This also reports the error into the kernel log. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +void fscache_io_error(struct fscache_cache *cache) +{ + set_bit(FSCACHE_IOERROR, &cache->flags); + + printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", + cache->ops->name); +} +EXPORT_SYMBOL(fscache_io_error); + +/* + * request withdrawal of all the objects in a cache + * - all the objects being withdrawn are moved onto the supplied list + */ +static void fscache_withdraw_all_objects(struct fscache_cache *cache, + struct list_head *dying_objects) +{ + struct fscache_object *object; + + spin_lock(&cache->object_list_lock); + + while (!list_empty(&cache->object_list)) { + object = list_entry(cache->object_list.next, + struct fscache_object, cache_link); + list_move_tail(&object->cache_link, dying_objects); + + _debug("withdraw %p", object->cookie); + + spin_lock(&object->lock); + spin_unlock(&cache->object_list_lock); + fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW); + spin_unlock(&object->lock); + + cond_resched(); + spin_lock(&cache->object_list_lock); + } + + spin_unlock(&cache->object_list_lock); +} + +/** + * fscache_withdraw_cache - Withdraw a cache from the active service + * @cache: The record describing the cache + * + * Withdraw a cache from service, unbinding all its cache objects from the + * netfs cookies they're currently representing. + * + * See Documentation/filesystems/caching/backend-api.txt for a complete + * description. + */ +void fscache_withdraw_cache(struct fscache_cache *cache) +{ + LIST_HEAD(dying_objects); + + _enter(""); + + printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n", + cache->tag->name); + + /* make the cache unavailable for cookie acquisition */ + if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags)) + BUG(); + + down_write(&fscache_addremove_sem); + list_del_init(&cache->link); + cache->tag->cache = NULL; + up_write(&fscache_addremove_sem); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disk */ + cache->ops->sync_cache(cache); + + /* dissociate all the netfs pages backed by this cache from the block + * mappings in the cache */ + cache->ops->dissociate_pages(cache); + + /* we now have to destroy all the active objects pertaining to this + * cache - which we do by passing them off to thread pool to be + * disposed of */ + _debug("destroy"); + + fscache_withdraw_all_objects(cache, &dying_objects); + + /* wait for all extant objects to finish their outstanding operations + * and go away */ + _debug("wait for finish"); + wait_event(fscache_cache_cleared_wq, + atomic_read(&cache->object_count) == 0); + _debug("wait for clearance"); + wait_event(fscache_cache_cleared_wq, + list_empty(&cache->object_list)); + _debug("cleared"); + ASSERT(list_empty(&dying_objects)); + + kobject_put(cache->kobj); + + clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags); + fscache_release_cache_tag(cache->tag); + cache->tag = NULL; + + _leave(""); +} +EXPORT_SYMBOL(fscache_withdraw_cache); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c new file mode 100644 index 00000000000..72fd18f6c71 --- /dev/null +++ b/fs/fscache/cookie.c @@ -0,0 +1,500 @@ +/* netfs cookie management + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * See Documentation/filesystems/caching/netfs-api.txt for more information on + * the netfs API. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include <linux/module.h> +#include <linux/slab.h> +#include "internal.h" + +struct kmem_cache *fscache_cookie_jar; + +static atomic_t fscache_object_debug_id = ATOMIC_INIT(0); + +static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie); +static int fscache_alloc_object(struct fscache_cache *cache, + struct fscache_cookie *cookie); +static int fscache_attach_object(struct fscache_cookie *cookie, + struct fscache_object *object); + +/* + * initialise an cookie jar slab element prior to any use + */ +void fscache_cookie_init_once(void *_cookie) +{ + struct fscache_cookie *cookie = _cookie; + + memset(cookie, 0, sizeof(*cookie)); + spin_lock_init(&cookie->lock); + INIT_HLIST_HEAD(&cookie->backing_objects); +} + +/* + * request a cookie to represent an object (index, datafile, xattr, etc) + * - parent specifies the parent object + * - the top level index cookie for each netfs is stored in the fscache_netfs + * struct upon registration + * - def points to the definition + * - the netfs_data will be passed to the functions pointed to in *def + * - all attached caches will be searched to see if they contain this object + * - index objects aren't stored on disk until there's a dependent file that + * needs storing + * - other objects are stored in a selected cache immediately, and all the + * indices forming the path to it are instantiated if necessary + * - we never let on to the netfs about errors + * - we may set a negative cookie pointer, but that's okay + */ +struct fscache_cookie *__fscache_acquire_cookie( + struct fscache_cookie *parent, + const struct fscache_cookie_def *def, + void *netfs_data) +{ + struct fscache_cookie *cookie; + + BUG_ON(!def); + + _enter("{%s},{%s},%p", + parent ? (char *) parent->def->name : "<no-parent>", + def->name, netfs_data); + + fscache_stat(&fscache_n_acquires); + + /* if there's no parent cookie, then we don't create one here either */ + if (!parent) { + fscache_stat(&fscache_n_acquires_null); + _leave(" [no parent]"); + return NULL; + } + + /* validate the definition */ + BUG_ON(!def->get_key); + BUG_ON(!def->name[0]); + + BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX && + parent->def->type != FSCACHE_COOKIE_TYPE_INDEX); + + /* allocate and initialise a cookie */ + cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + if (!cookie) { + fscache_stat(&fscache_n_acquires_oom); + _leave(" [ENOMEM]"); + return NULL; + } + + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->n_children, 0); + + atomic_inc(&parent->usage); + atomic_inc(&parent->n_children); + + cookie->def = def; + cookie->parent = parent; + cookie->netfs_data = netfs_data; + cookie->flags = 0; + + INIT_RADIX_TREE(&cookie->stores, GFP_NOFS); + + switch (cookie->def->type) { + case FSCACHE_COOKIE_TYPE_INDEX: + fscache_stat(&fscache_n_cookie_index); + break; + case FSCACHE_COOKIE_TYPE_DATAFILE: + fscache_stat(&fscache_n_cookie_data); + break; + default: + fscache_stat(&fscache_n_cookie_special); + break; + } + + /* if the object is an index then we need do nothing more here - we + * create indices on disk when we need them as an index may exist in + * multiple caches */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (fscache_acquire_non_index_cookie(cookie) < 0) { + atomic_dec(&parent->n_children); + __fscache_cookie_put(cookie); + fscache_stat(&fscache_n_acquires_nobufs); + _leave(" = NULL"); + return NULL; + } + } + + fscache_stat(&fscache_n_acquires_ok); + _leave(" = %p", cookie); + return cookie; +} +EXPORT_SYMBOL(__fscache_acquire_cookie); + +/* + * acquire a non-index cookie + * - this must make sure the index chain is instantiated and instantiate the + * object representation too + */ +static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + struct fscache_cache *cache; + uint64_t i_size; + int ret; + + _enter(""); + + cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE; + + /* now we need to see whether the backing objects for this cookie yet + * exist, if not there'll be nothing to search */ + down_read(&fscache_addremove_sem); + + if (list_empty(&fscache_cache_list)) { + up_read(&fscache_addremove_sem); + _leave(" = 0 [no caches]"); + return 0; + } + + /* select a cache in which to store the object */ + cache = fscache_select_cache_for_object(cookie->parent); + if (!cache) { + up_read(&fscache_addremove_sem); + fscache_stat(&fscache_n_acquires_no_cache); + _leave(" = -ENOMEDIUM [no cache]"); + return -ENOMEDIUM; + } + + _debug("cache %s", cache->tag->name); + + cookie->flags = + (1 << FSCACHE_COOKIE_LOOKING_UP) | + (1 << FSCACHE_COOKIE_CREATING) | + (1 << FSCACHE_COOKIE_NO_DATA_YET); + + /* ask the cache to allocate objects for this cookie and its parent + * chain */ + ret = fscache_alloc_object(cache, cookie); + if (ret < 0) { + up_read(&fscache_addremove_sem); + _leave(" = %d", ret); + return ret; + } + + /* pass on how big the object we're caching is supposed to be */ + cookie->def->get_attr(cookie->netfs_data, &i_size); + + spin_lock(&cookie->lock); + if (hlist_empty(&cookie->backing_objects)) { + spin_unlock(&cookie->lock); + goto unavailable; + } + + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + fscache_set_store_limit(object, i_size); + + /* initiate the process of looking up all the objects in the chain + * (done by fscache_initialise_object()) */ + fscache_enqueue_object(object); + + spin_unlock(&cookie->lock); + + /* we may be required to wait for lookup to complete at this point */ + if (!fscache_defer_lookup) { + _debug("non-deferred lookup %p", &cookie->flags); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("complete"); + if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) + goto unavailable; + } + + up_read(&fscache_addremove_sem); + _leave(" = 0 [deferred]"); + return 0; + +unavailable: + up_read(&fscache_addremove_sem); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} + +/* + * recursively allocate cache object records for a cookie/cache combination + * - caller must be holding the addremove sem + */ +static int fscache_alloc_object(struct fscache_cache *cache, + struct fscache_cookie *cookie) +{ + struct fscache_object *object; + struct hlist_node *_n; + int ret; + + _enter("%p,%p{%s}", cache, cookie, cookie->def->name); + + spin_lock(&cookie->lock); + hlist_for_each_entry(object, _n, &cookie->backing_objects, + cookie_link) { + if (object->cache == cache) + goto object_already_extant; + } + spin_unlock(&cookie->lock); + + /* ask the cache to allocate an object (we may end up with duplicate + * objects at this stage, but we sort that out later) */ + object = cache->ops->alloc_object(cache, cookie); + if (IS_ERR(object)) { + fscache_stat(&fscache_n_object_no_alloc); + ret = PTR_ERR(object); + goto error; + } + + fscache_stat(&fscache_n_object_alloc); + + object->debug_id = atomic_inc_return(&fscache_object_debug_id); + + _debug("ALLOC OBJ%x: %s {%lx}", + object->debug_id, cookie->def->name, object->events); + + ret = fscache_alloc_object(cache, cookie->parent); + if (ret < 0) + goto error_put; + + /* only attach if we managed to allocate all we needed, otherwise + * discard the object we just allocated and instead use the one + * attached to the cookie */ + if (fscache_attach_object(cookie, object) < 0) + cache->ops->put_object(object); + + _leave(" = 0"); + return 0; + +object_already_extant: + ret = -ENOBUFS; + if (object->state >= FSCACHE_OBJECT_DYING) { + spin_unlock(&cookie->lock); + goto error; + } + spin_unlock(&cookie->lock); + _leave(" = 0 [found]"); + return 0; + +error_put: + cache->ops->put_object(object); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * attach a cache object to a cookie + */ +static int fscache_attach_object(struct fscache_cookie *cookie, + struct fscache_object *object) +{ + struct fscache_object *p; + struct fscache_cache *cache = object->cache; + struct hlist_node *_n; + int ret; + + _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); + + spin_lock(&cookie->lock); + + /* there may be multiple initial creations of this object, but we only + * want one */ + ret = -EEXIST; + hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { + if (p->cache == object->cache) { + if (p->state >= FSCACHE_OBJECT_DYING) + ret = -ENOBUFS; + goto cant_attach_object; + } + } + + /* pin the parent object */ + spin_lock_nested(&cookie->parent->lock, 1); + hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, + cookie_link) { + if (p->cache == object->cache) { + if (p->state >= FSCACHE_OBJECT_DYING) { + ret = -ENOBUFS; + spin_unlock(&cookie->parent->lock); + goto cant_attach_object; + } + object->parent = p; + spin_lock(&p->lock); + p->n_children++; + spin_unlock(&p->lock); + break; + } + } + spin_unlock(&cookie->parent->lock); + + /* attach to the cache's object list */ + if (list_empty(&object->cache_link)) { + spin_lock(&cache->object_list_lock); + list_add(&object->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + } + + /* attach to the cookie */ + object->cookie = cookie; + atomic_inc(&cookie->usage); + hlist_add_head(&object->cookie_link, &cookie->backing_objects); + ret = 0; + +cant_attach_object: + spin_unlock(&cookie->lock); + _leave(" = %d", ret); + return ret; +} + +/* + * update the index entries backing a cookie + */ +void __fscache_update_cookie(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + struct hlist_node *_p; + + fscache_stat(&fscache_n_updates); + + if (!cookie) { + fscache_stat(&fscache_n_updates_null); + _leave(" [no cookie]"); + return; + } + + _enter("{%s}", cookie->def->name); + + BUG_ON(!cookie->def->get_aux); + + spin_lock(&cookie->lock); + + /* update the index entry on disk in each cache backing this cookie */ + hlist_for_each_entry(object, _p, + &cookie->backing_objects, cookie_link) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + } + + spin_unlock(&cookie->lock); + _leave(""); +} +EXPORT_SYMBOL(__fscache_update_cookie); + +/* + * release a cookie back to the cache + * - the object will be marked as recyclable on disk if retire is true + * - all dependents of this cookie must have already been unregistered + * (indices/files/pages) + */ +void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) +{ + struct fscache_cache *cache; + struct fscache_object *object; + unsigned long event; + + fscache_stat(&fscache_n_relinquishes); + + if (!cookie) { + fscache_stat(&fscache_n_relinquishes_null); + _leave(" [no cookie]"); + return; + } + + _enter("%p{%s,%p},%d", + cookie, cookie->def->name, cookie->netfs_data, retire); + + if (atomic_read(&cookie->n_children) != 0) { + printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", + cookie->def->name); + BUG(); + } + + /* wait for the cookie to finish being instantiated (or to fail) */ + if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) { + fscache_stat(&fscache_n_relinquishes_waitcrt); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } + + event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; + + /* detach pointers back to the netfs */ + spin_lock(&cookie->lock); + + cookie->netfs_data = NULL; + cookie->def = NULL; + + /* break links with all the active objects */ + while (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + + _debug("RELEASE OBJ%x", object->debug_id); + + /* detach each cache object from the object cookie */ + spin_lock(&object->lock); + hlist_del_init(&object->cookie_link); + + cache = object->cache; + object->cookie = NULL; + fscache_raise_event(object, event); + spin_unlock(&object->lock); + + if (atomic_dec_and_test(&cookie->usage)) + /* the cookie refcount shouldn't be reduced to 0 yet */ + BUG(); + } + + spin_unlock(&cookie->lock); + + if (cookie->parent) { + ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); + ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0); + atomic_dec(&cookie->parent->n_children); + } + + /* finally dispose of the cookie */ + ASSERTCMP(atomic_read(&cookie->usage), >, 0); + fscache_cookie_put(cookie); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_relinquish_cookie); + +/* + * destroy a cookie + */ +void __fscache_cookie_put(struct fscache_cookie *cookie) +{ + struct fscache_cookie *parent; + + _enter("%p", cookie); + + for (;;) { + _debug("FREE COOKIE %p", cookie); + parent = cookie->parent; + BUG_ON(!hlist_empty(&cookie->backing_objects)); + kmem_cache_free(fscache_cookie_jar, cookie); + + if (!parent) + break; + + cookie = parent; + BUG_ON(atomic_read(&cookie->usage) <= 0); + if (!atomic_dec_and_test(&cookie->usage)) + break; + } + + _leave(""); +} diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c new file mode 100644 index 00000000000..f5b4baee735 --- /dev/null +++ b/fs/fscache/fsdef.c @@ -0,0 +1,144 @@ +/* Filesystem index definition + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL CACHE +#include <linux/module.h> +#include "internal.h" + +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static +enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + +/* + * The root index is owned by FS-Cache itself. + * + * When a netfs requests caching facilities, FS-Cache will, if one doesn't + * already exist, create an entry in the root index with the key being the name + * of the netfs ("AFS" for example), and the auxiliary data holding the index + * structure version supplied by the netfs: + * + * FSDEF + * | + * +-----------+ + * | | + * NFS AFS + * [v=1] [v=1] + * + * If an entry with the appropriate name does already exist, the version is + * compared. If the version is different, the entire subtree from that entry + * will be discarded and a new entry created. + * + * The new entry will be an index, and a cookie referring to it will be passed + * to the netfs. This is then the root handle by which the netfs accesses the + * cache. It can create whatever objects it likes in that index, including + * further indices. + */ +static struct fscache_cookie_def fscache_fsdef_index_def = { + .name = ".FS-Cache", + .type = FSCACHE_COOKIE_TYPE_INDEX, +}; + +struct fscache_cookie fscache_fsdef_index = { + .usage = ATOMIC_INIT(1), + .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), + .backing_objects = HLIST_HEAD_INIT, + .def = &fscache_fsdef_index_def, +}; +EXPORT_SYMBOL(fscache_fsdef_index); + +/* + * Definition of an entry in the root index. Each entry is an index, keyed to + * a specific netfs and only applicable to a particular version of the index + * structure used by that netfs. + */ +struct fscache_cookie_def fscache_fsdef_netfs_def = { + .name = "FSDEF.netfs", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = fscache_fsdef_netfs_get_key, + .get_aux = fscache_fsdef_netfs_get_aux, + .check_aux = fscache_fsdef_netfs_check_aux, +}; + +/* + * get the key data for an FSDEF index record - this is the name of the netfs + * for which this entry is created + */ +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned klen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + klen = strlen(netfs->name); + if (klen > bufmax) + return 0; + + memcpy(buffer, netfs->name, klen); + return klen; +} + +/* + * get the auxiliary data for an FSDEF index record - this is the index + * structure version number of the netfs for which this version is created + */ +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned dlen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + dlen = sizeof(uint32_t); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &netfs->version, dlen); + return dlen; +} + +/* + * check that the index structure version number stored in the auxiliary data + * matches the one the netfs gave us + */ +static enum fscache_checkaux fscache_fsdef_netfs_check_aux( + void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct fscache_netfs *netfs = cookie_netfs_data; + uint32_t version; + + _enter("{%s},,%hu", netfs->name, datalen); + + if (datalen != sizeof(version)) { + _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version)); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + memcpy(&version, data, sizeof(version)); + if (version != netfs->version) { + _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; +} diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c new file mode 100644 index 00000000000..bad496748a5 --- /dev/null +++ b/fs/fscache/histogram.c @@ -0,0 +1,109 @@ +/* FS-Cache latency histogram + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL THREAD +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include "internal.h" + +atomic_t fscache_obj_instantiate_histogram[HZ]; +atomic_t fscache_objs_histogram[HZ]; +atomic_t fscache_ops_histogram[HZ]; +atomic_t fscache_retrieval_delay_histogram[HZ]; +atomic_t fscache_retrieval_histogram[HZ]; + +/* + * display the time-taken histogram + */ +static int fscache_histogram_show(struct seq_file *m, void *v) +{ + unsigned long index; + unsigned n[5], t; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS " + " RETRV DLY RETRIEVLS\n"); + return 0; + case 2: + seq_puts(m, "===== ===== ========= ========= =========" + " ========= =========\n"); + return 0; + default: + index = (unsigned long) v - 3; + n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]); + n[1] = atomic_read(&fscache_ops_histogram[index]); + n[2] = atomic_read(&fscache_objs_histogram[index]); + n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]); + n[4] = atomic_read(&fscache_retrieval_histogram[index]); + if (!(n[0] | n[1] | n[2] | n[3] | n[4])) + return 0; + + t = (index * 1000) / HZ; + + seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n", + index, t, n[0], n[1], n[2], n[3], n[4]); + return 0; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos) +{ + if ((unsigned long long)*_pos >= HZ + 2) + return NULL; + if (*_pos == 0) + *_pos = 1; + return (void *)(unsigned long) *_pos; +} + +/* + * move to the next line + */ +static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (unsigned long long)*pos > HZ + 2 ? + NULL : (void *)(unsigned long) *pos; +} + +/* + * clean up after reading + */ +static void fscache_histogram_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations fscache_histogram_ops = { + .start = fscache_histogram_start, + .stop = fscache_histogram_stop, + .next = fscache_histogram_next, + .show = fscache_histogram_show, +}; + +/* + * open "/proc/fs/fscache/histogram" to provide latency data + */ +static int fscache_histogram_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fscache_histogram_ops); +} + +const struct file_operations fscache_histogram_fops = { + .owner = THIS_MODULE, + .open = fscache_histogram_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h new file mode 100644 index 00000000000..e0cbd16f6dc --- /dev/null +++ b/fs/fscache/internal.h @@ -0,0 +1,380 @@ +/* Internal definitions for FS-Cache + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Lock order, in the order in which multiple locks should be obtained: + * - fscache_addremove_sem + * - cookie->lock + * - cookie->parent->lock + * - cache->object_list_lock + * - object->lock + * - object->parent->lock + * - fscache_thread_lock + * + */ + +#include <linux/fscache-cache.h> +#include <linux/sched.h> + +#define FSCACHE_MIN_THREADS 4 +#define FSCACHE_MAX_THREADS 32 + +/* + * fsc-cache.c + */ +extern struct list_head fscache_cache_list; +extern struct rw_semaphore fscache_addremove_sem; + +extern struct fscache_cache *fscache_select_cache_for_object( + struct fscache_cookie *); + +/* + * fsc-cookie.c + */ +extern struct kmem_cache *fscache_cookie_jar; + +extern void fscache_cookie_init_once(void *); +extern void __fscache_cookie_put(struct fscache_cookie *); + +/* + * fsc-fsdef.c + */ +extern struct fscache_cookie fscache_fsdef_index; +extern struct fscache_cookie_def fscache_fsdef_netfs_def; + +/* + * fsc-histogram.c + */ +#ifdef CONFIG_FSCACHE_HISTOGRAM +extern atomic_t fscache_obj_instantiate_histogram[HZ]; +extern atomic_t fscache_objs_histogram[HZ]; +extern atomic_t fscache_ops_histogram[HZ]; +extern atomic_t fscache_retrieval_delay_histogram[HZ]; +extern atomic_t fscache_retrieval_histogram[HZ]; + +static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) +{ + unsigned long jif = jiffies - start_jif; + if (jif >= HZ) + jif = HZ - 1; + atomic_inc(&histogram[jif]); +} + +extern const struct file_operations fscache_histogram_fops; + +#else +#define fscache_hist(hist, start_jif) do {} while (0) +#endif + +/* + * fsc-main.c + */ +extern unsigned fscache_defer_lookup; +extern unsigned fscache_defer_create; +extern unsigned fscache_debug; +extern struct kobject *fscache_root; + +extern int fscache_wait_bit(void *); +extern int fscache_wait_bit_interruptible(void *); + +/* + * fsc-object.c + */ +extern void fscache_withdrawing_object(struct fscache_cache *, + struct fscache_object *); +extern void fscache_enqueue_object(struct fscache_object *); + +/* + * fsc-operation.c + */ +extern int fscache_submit_exclusive_op(struct fscache_object *, + struct fscache_operation *); +extern int fscache_submit_op(struct fscache_object *, + struct fscache_operation *); +extern void fscache_abort_object(struct fscache_object *); +extern void fscache_start_operations(struct fscache_object *); +extern void fscache_operation_gc(struct work_struct *); + +/* + * fsc-proc.c + */ +#ifdef CONFIG_PROC_FS +extern int __init fscache_proc_init(void); +extern void fscache_proc_cleanup(void); +#else +#define fscache_proc_init() (0) +#define fscache_proc_cleanup() do {} while (0) +#endif + +/* + * fsc-stats.c + */ +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; +extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS]; + +extern atomic_t fscache_n_op_pend; +extern atomic_t fscache_n_op_run; +extern atomic_t fscache_n_op_enqueue; +extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_release; +extern atomic_t fscache_n_op_gc; + +extern atomic_t fscache_n_attr_changed; +extern atomic_t fscache_n_attr_changed_ok; +extern atomic_t fscache_n_attr_changed_nobufs; +extern atomic_t fscache_n_attr_changed_nomem; +extern atomic_t fscache_n_attr_changed_calls; + +extern atomic_t fscache_n_allocs; +extern atomic_t fscache_n_allocs_ok; +extern atomic_t fscache_n_allocs_wait; +extern atomic_t fscache_n_allocs_nobufs; +extern atomic_t fscache_n_alloc_ops; +extern atomic_t fscache_n_alloc_op_waits; + +extern atomic_t fscache_n_retrievals; +extern atomic_t fscache_n_retrievals_ok; +extern atomic_t fscache_n_retrievals_wait; +extern atomic_t fscache_n_retrievals_nodata; +extern atomic_t fscache_n_retrievals_nobufs; +extern atomic_t fscache_n_retrievals_intr; +extern atomic_t fscache_n_retrievals_nomem; +extern atomic_t fscache_n_retrieval_ops; +extern atomic_t fscache_n_retrieval_op_waits; + +extern atomic_t fscache_n_stores; +extern atomic_t fscache_n_stores_ok; +extern atomic_t fscache_n_stores_again; +extern atomic_t fscache_n_stores_nobufs; +extern atomic_t fscache_n_stores_oom; +extern atomic_t fscache_n_store_ops; +extern atomic_t fscache_n_store_calls; + +extern atomic_t fscache_n_marks; +extern atomic_t fscache_n_uncaches; + +extern atomic_t fscache_n_acquires; +extern atomic_t fscache_n_acquires_null; +extern atomic_t fscache_n_acquires_no_cache; +extern atomic_t fscache_n_acquires_ok; +extern atomic_t fscache_n_acquires_nobufs; +extern atomic_t fscache_n_acquires_oom; + +extern atomic_t fscache_n_updates; +extern atomic_t fscache_n_updates_null; +extern atomic_t fscache_n_updates_run; + +extern atomic_t fscache_n_relinquishes; +extern atomic_t fscache_n_relinquishes_null; +extern atomic_t fscache_n_relinquishes_waitcrt; + +extern atomic_t fscache_n_cookie_index; +extern atomic_t fscache_n_cookie_data; +extern atomic_t fscache_n_cookie_special; + +extern atomic_t fscache_n_object_alloc; +extern atomic_t fscache_n_object_no_alloc; +extern atomic_t fscache_n_object_lookups; +extern atomic_t fscache_n_object_lookups_negative; +extern atomic_t fscache_n_object_lookups_positive; +extern atomic_t fscache_n_object_created; +extern atomic_t fscache_n_object_avail; +extern atomic_t fscache_n_object_dead; + +extern atomic_t fscache_n_checkaux_none; +extern atomic_t fscache_n_checkaux_okay; +extern atomic_t fscache_n_checkaux_update; +extern atomic_t fscache_n_checkaux_obsolete; + +static inline void fscache_stat(atomic_t *stat) +{ + atomic_inc(stat); +} + +extern const struct file_operations fscache_stats_fops; +#else + +#define fscache_stat(stat) do {} while (0) +#endif + +/* + * raise an event on an object + * - if the event is not masked for that object, then the object is + * queued for attention by the thread pool. + */ +static inline void fscache_raise_event(struct fscache_object *object, + unsigned event) +{ + if (!test_and_set_bit(event, &object->events) && + test_bit(event, &object->event_mask)) + fscache_enqueue_object(object); +} + +/* + * drop a reference to a cookie + */ +static inline void fscache_cookie_put(struct fscache_cookie *cookie) +{ + BUG_ON(atomic_read(&cookie->usage) <= 0); + if (atomic_dec_and_test(&cookie->usage)) + __fscache_cookie_put(cookie); +} + +/* + * get an extra reference to a netfs retrieval context + */ +static inline +void *fscache_get_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->get_context) + cookie->def->get_context(cookie->netfs_data, context); + return context; +} + +/* + * release a reference to a netfs retrieval context + */ +static inline +void fscache_put_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->put_context) + cookie->def->put_context(cookie->netfs_data, context); +} + +/*****************************************************************************/ +/* + * debug tracing + */ +#define dbgprintk(FMT, ...) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + +/* make sure we maintain the format strings, even when debugging is disabled */ +static inline __attribute__((format(printf, 1, 2))) +void _dbprintk(const char *fmt, ...) +{ +} + +#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) + +#define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) + +#ifdef __KDEBUG +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_FSCACHE_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (__do_kdebug(ENTER)) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (__do_kdebug(LEAVE)) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (__do_kdebug(DEBUG)) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) +#endif + +/* + * determine whether a particular optional debugging point should be logged + * - we need to go through three steps to persuade cpp to correctly join the + * shorthand in FSCACHE_DEBUG_LEVEL with its prefix + */ +#define ____do_kdebug(LEVEL, POINT) \ + unlikely((fscache_debug & \ + (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) +#define ___do_kdebug(LEVEL, POINT) \ + ____do_kdebug(LEVEL, POINT) +#define __do_kdebug(POINT) \ + ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) + +#define FSCACHE_DEBUG_CACHE 0 +#define FSCACHE_DEBUG_COOKIE 1 +#define FSCACHE_DEBUG_PAGE 2 +#define FSCACHE_DEBUG_OPERATION 3 + +#define FSCACHE_POINT_ENTER 1 +#define FSCACHE_POINT_LEAVE 2 +#define FSCACHE_POINT_DEBUG 4 + +#ifndef FSCACHE_DEBUG_LEVEL +#define FSCACHE_DEBUG_LEVEL CACHE +#endif + +/* + * assertions + */ +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif /* assert or not */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c new file mode 100644 index 00000000000..4de41b59749 --- /dev/null +++ b/fs/fscache/main.c @@ -0,0 +1,124 @@ +/* General filesystem local caching manager + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL CACHE +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/completion.h> +#include <linux/slab.h> +#include "internal.h" + +MODULE_DESCRIPTION("FS Cache Manager"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +unsigned fscache_defer_lookup = 1; +module_param_named(defer_lookup, fscache_defer_lookup, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(fscache_defer_lookup, + "Defer cookie lookup to background thread"); + +unsigned fscache_defer_create = 1; +module_param_named(defer_create, fscache_defer_create, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(fscache_defer_create, + "Defer cookie creation to background thread"); + +unsigned fscache_debug; +module_param_named(debug, fscache_debug, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(fscache_debug, + "FS-Cache debugging mask"); + +struct kobject *fscache_root; + +/* + * initialise the fs caching module + */ +static int __init fscache_init(void) +{ + int ret; + + ret = slow_work_register_user(); + if (ret < 0) + goto error_slow_work; + + ret = fscache_proc_init(); + if (ret < 0) + goto error_proc; + + fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", + sizeof(struct fscache_cookie), + 0, + 0, + fscache_cookie_init_once); + if (!fscache_cookie_jar) { + printk(KERN_NOTICE + "FS-Cache: Failed to allocate a cookie jar\n"); + ret = -ENOMEM; + goto error_cookie_jar; + } + + fscache_root = kobject_create_and_add("fscache", kernel_kobj); + if (!fscache_root) + goto error_kobj; + + printk(KERN_NOTICE "FS-Cache: Loaded\n"); + return 0; + +error_kobj: + kmem_cache_destroy(fscache_cookie_jar); +error_cookie_jar: + fscache_proc_cleanup(); +error_proc: + slow_work_unregister_user(); +error_slow_work: + return ret; +} + +fs_initcall(fscache_init); + +/* + * clean up on module removal + */ +static void __exit fscache_exit(void) +{ + _enter(""); + + kobject_put(fscache_root); + kmem_cache_destroy(fscache_cookie_jar); + fscache_proc_cleanup(); + slow_work_unregister_user(); + printk(KERN_NOTICE "FS-Cache: Unloaded\n"); +} + +module_exit(fscache_exit); + +/* + * wait_on_bit() sleep function for uninterruptible waiting + */ +int fscache_wait_bit(void *flags) +{ + schedule(); + return 0; +} +EXPORT_SYMBOL(fscache_wait_bit); + +/* + * wait_on_bit() sleep function for interruptible waiting + */ +int fscache_wait_bit_interruptible(void *flags) +{ + schedule(); + return signal_pending(current); +} +EXPORT_SYMBOL(fscache_wait_bit_interruptible); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c new file mode 100644 index 00000000000..e028b8eb1c4 --- /dev/null +++ b/fs/fscache/netfs.c @@ -0,0 +1,103 @@ +/* FS-Cache netfs (client) registration + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include <linux/module.h> +#include <linux/slab.h> +#include "internal.h" + +static LIST_HEAD(fscache_netfs_list); + +/* + * register a network filesystem for caching + */ +int __fscache_register_netfs(struct fscache_netfs *netfs) +{ + struct fscache_netfs *ptr; + int ret; + + _enter("{%s}", netfs->name); + + INIT_LIST_HEAD(&netfs->link); + + /* allocate a cookie for the primary index */ + netfs->primary_index = + kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); + + if (!netfs->primary_index) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + /* initialise the primary index cookie */ + atomic_set(&netfs->primary_index->usage, 1); + atomic_set(&netfs->primary_index->n_children, 0); + + netfs->primary_index->def = &fscache_fsdef_netfs_def; + netfs->primary_index->parent = &fscache_fsdef_index; + netfs->primary_index->netfs_data = netfs; + + atomic_inc(&netfs->primary_index->parent->usage); + atomic_inc(&netfs->primary_index->parent->n_children); + + spin_lock_init(&netfs->primary_index->lock); + INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); + + /* check the netfs type is not already present */ + down_write(&fscache_addremove_sem); + + ret = -EEXIST; + list_for_each_entry(ptr, &fscache_netfs_list, link) { + if (strcmp(ptr->name, netfs->name) == 0) + goto already_registered; + } + + list_add(&netfs->link, &fscache_netfs_list); + ret = 0; + + printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n", + netfs->name); + +already_registered: + up_write(&fscache_addremove_sem); + + if (ret < 0) { + netfs->primary_index->parent = NULL; + __fscache_cookie_put(netfs->primary_index); + netfs->primary_index = NULL; + } + + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(__fscache_register_netfs); + +/* + * unregister a network filesystem from the cache + * - all cookies must have been released first + */ +void __fscache_unregister_netfs(struct fscache_netfs *netfs) +{ + _enter("{%s.%u}", netfs->name, netfs->version); + + down_write(&fscache_addremove_sem); + + list_del(&netfs->link); + fscache_relinquish_cookie(netfs->primary_index, 0); + + up_write(&fscache_addremove_sem); + + printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n", + netfs->name); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_unregister_netfs); diff --git a/fs/fscache/object.c b/fs/fscache/object.c new file mode 100644 index 00000000000..392a41b1b79 --- /dev/null +++ b/fs/fscache/object.c @@ -0,0 +1,810 @@ +/* FS-Cache object state machine handler + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * See Documentation/filesystems/caching/object.txt for a description of the + * object state machine and the in-kernel representations. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include <linux/module.h> +#include "internal.h" + +const char *fscache_object_states[] = { + [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", + [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", + [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", + [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", + [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", + [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", + [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", + [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", + [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", + [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", + [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", + [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", +}; +EXPORT_SYMBOL(fscache_object_states); + +static void fscache_object_slow_work_put_ref(struct slow_work *); +static int fscache_object_slow_work_get_ref(struct slow_work *); +static void fscache_object_slow_work_execute(struct slow_work *); +static void fscache_initialise_object(struct fscache_object *); +static void fscache_lookup_object(struct fscache_object *); +static void fscache_object_available(struct fscache_object *); +static void fscache_release_object(struct fscache_object *); +static void fscache_withdraw_object(struct fscache_object *); +static void fscache_enqueue_dependents(struct fscache_object *); +static void fscache_dequeue_object(struct fscache_object *); + +const struct slow_work_ops fscache_object_slow_work_ops = { + .get_ref = fscache_object_slow_work_get_ref, + .put_ref = fscache_object_slow_work_put_ref, + .execute = fscache_object_slow_work_execute, +}; +EXPORT_SYMBOL(fscache_object_slow_work_ops); + +/* + * we need to notify the parent when an op completes that we had outstanding + * upon it + */ +static inline void fscache_done_parent_op(struct fscache_object *object) +{ + struct fscache_object *parent = object->parent; + + _enter("OBJ%x {OBJ%x,%x}", + object->debug_id, parent->debug_id, parent->n_ops); + + spin_lock_nested(&parent->lock, 1); + parent->n_ops--; + parent->n_obj_ops--; + if (parent->n_ops == 0) + fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); + spin_unlock(&parent->lock); +} + +/* + * process events that have been sent to an object's state machine + * - initiates parent lookup + * - does object lookup + * - does object creation + * - does object recycling and retirement + * - does object withdrawal + */ +static void fscache_object_state_machine(struct fscache_object *object) +{ + enum fscache_object_state new_state; + + ASSERT(object != NULL); + + _enter("{OBJ%x,%s,%lx}", + object->debug_id, fscache_object_states[object->state], + object->events); + + switch (object->state) { + /* wait for the parent object to become ready */ + case FSCACHE_OBJECT_INIT: + object->event_mask = + ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + fscache_initialise_object(object); + goto done; + + /* look up the object metadata on disk */ + case FSCACHE_OBJECT_LOOKING_UP: + fscache_lookup_object(object); + goto lookup_transit; + + /* create the object metadata on disk */ + case FSCACHE_OBJECT_CREATING: + fscache_lookup_object(object); + goto lookup_transit; + + /* handle an object becoming available; start pending + * operations and queue dependent operations for processing */ + case FSCACHE_OBJECT_AVAILABLE: + fscache_object_available(object); + goto active_transit; + + /* normal running state */ + case FSCACHE_OBJECT_ACTIVE: + goto active_transit; + + /* update the object metadata on disk */ + case FSCACHE_OBJECT_UPDATING: + clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); + fscache_stat(&fscache_n_updates_run); + object->cache->ops->update_object(object); + goto active_transit; + + /* handle an object dying during lookup or creation */ + case FSCACHE_OBJECT_LC_DYING: + object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); + object->cache->ops->lookup_complete(object); + + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DYING; + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_CREATING); + spin_unlock(&object->lock); + + fscache_done_parent_op(object); + + /* wait for completion of all active operations on this object + * and the death of all child objects of this object */ + case FSCACHE_OBJECT_DYING: + dying: + clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); + spin_lock(&object->lock); + _debug("dying OBJ%x {%d,%d}", + object->debug_id, object->n_ops, object->n_children); + if (object->n_ops == 0 && object->n_children == 0) { + object->event_mask &= + ~(1 << FSCACHE_OBJECT_EV_CLEARED); + object->event_mask |= + (1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR); + } else { + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + object->event_mask |= + 1 << FSCACHE_OBJECT_EV_CLEARED; + } + spin_unlock(&object->lock); + fscache_enqueue_dependents(object); + goto terminal_transit; + + /* handle an abort during initialisation */ + case FSCACHE_OBJECT_ABORT_INIT: + _debug("handle abort init %lx", object->events); + object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); + + spin_lock(&object->lock); + fscache_dequeue_object(object); + + object->state = FSCACHE_OBJECT_DYING; + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_CREATING); + spin_unlock(&object->lock); + goto dying; + + /* handle the netfs releasing an object and possibly marking it + * obsolete too */ + case FSCACHE_OBJECT_RELEASING: + case FSCACHE_OBJECT_RECYCLING: + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + fscache_release_object(object); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DEAD; + spin_unlock(&object->lock); + fscache_stat(&fscache_n_object_dead); + goto terminal_transit; + + /* handle the parent cache of this object being withdrawn from + * active service */ + case FSCACHE_OBJECT_WITHDRAWING: + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + fscache_withdraw_object(object); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DEAD; + spin_unlock(&object->lock); + fscache_stat(&fscache_n_object_dead); + goto terminal_transit; + + /* complain about the object being woken up once it is + * deceased */ + case FSCACHE_OBJECT_DEAD: + printk(KERN_ERR "FS-Cache:" + " Unexpected event in dead state %lx\n", + object->events & object->event_mask); + BUG(); + + default: + printk(KERN_ERR "FS-Cache: Unknown object state %u\n", + object->state); + BUG(); + } + + /* determine the transition from a lookup state */ +lookup_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + case FSCACHE_OBJECT_EV_RETIRE: + case FSCACHE_OBJECT_EV_RELEASE: + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_LC_DYING; + goto change_state; + case FSCACHE_OBJECT_EV_REQUEUE: + goto done; + case -1: + goto done; /* sleep until event */ + default: + goto unsupported_event; + } + + /* determine the transition from an active state */ +active_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + case FSCACHE_OBJECT_EV_RETIRE: + case FSCACHE_OBJECT_EV_RELEASE: + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_DYING; + goto change_state; + case FSCACHE_OBJECT_EV_UPDATE: + new_state = FSCACHE_OBJECT_UPDATING; + goto change_state; + case -1: + new_state = FSCACHE_OBJECT_ACTIVE; + goto change_state; /* sleep until event */ + default: + goto unsupported_event; + } + + /* determine the transition from a terminal state */ +terminal_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + new_state = FSCACHE_OBJECT_WITHDRAWING; + goto change_state; + case FSCACHE_OBJECT_EV_RETIRE: + new_state = FSCACHE_OBJECT_RECYCLING; + goto change_state; + case FSCACHE_OBJECT_EV_RELEASE: + new_state = FSCACHE_OBJECT_RELEASING; + goto change_state; + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_WITHDRAWING; + goto change_state; + case FSCACHE_OBJECT_EV_CLEARED: + new_state = FSCACHE_OBJECT_DYING; + goto change_state; + case -1: + goto done; /* sleep until event */ + default: + goto unsupported_event; + } + +change_state: + spin_lock(&object->lock); + object->state = new_state; + spin_unlock(&object->lock); + +done: + _leave(" [->%s]", fscache_object_states[object->state]); + return; + +unsupported_event: + printk(KERN_ERR "FS-Cache:" + " Unsupported event %lx [mask %lx] in state %s\n", + object->events, object->event_mask, + fscache_object_states[object->state]); + BUG(); +} + +/* + * execute an object + */ +static void fscache_object_slow_work_execute(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + unsigned long start; + + _enter("{OBJ%x}", object->debug_id); + + clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + + start = jiffies; + fscache_object_state_machine(object); + fscache_hist(fscache_objs_histogram, start); + if (object->events & object->event_mask) + fscache_enqueue_object(object); +} + +/* + * initialise an object + * - check the specified object's parent to see if we can make use of it + * immediately to do a creation + * - we may need to start the process of creating a parent and we need to wait + * for the parent's lookup and creation to complete if it's not there yet + * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the + * leaf-most cookies of the object and all its children + */ +static void fscache_initialise_object(struct fscache_object *object) +{ + struct fscache_object *parent; + + _enter(""); + ASSERT(object->cookie != NULL); + ASSERT(object->cookie->parent != NULL); + ASSERT(list_empty(&object->work.link)); + + if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { + _debug("abort init %lx", object->events); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_ABORT_INIT; + spin_unlock(&object->lock); + return; + } + + spin_lock(&object->cookie->lock); + spin_lock_nested(&object->cookie->parent->lock, 1); + + parent = object->parent; + if (!parent) { + _debug("no parent"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + } else { + spin_lock(&object->lock); + spin_lock_nested(&parent->lock, 1); + _debug("parent %s", fscache_object_states[parent->state]); + + if (parent->state >= FSCACHE_OBJECT_DYING) { + _debug("bad parent"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { + _debug("wait"); + + /* we may get woken up in this state by child objects + * binding on to us, so we need to make sure we don't + * add ourself to the list multiple times */ + if (list_empty(&object->dep_link)) { + object->cache->ops->grab_object(object); + list_add(&object->dep_link, + &parent->dependents); + + /* fscache_acquire_non_index_cookie() uses this + * to wake the chain up */ + if (parent->state == FSCACHE_OBJECT_INIT) + fscache_enqueue_object(parent); + } + } else { + _debug("go"); + parent->n_ops++; + parent->n_obj_ops++; + object->lookup_jif = jiffies; + object->state = FSCACHE_OBJECT_LOOKING_UP; + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } + + spin_unlock(&parent->lock); + spin_unlock(&object->lock); + } + + spin_unlock(&object->cookie->parent->lock); + spin_unlock(&object->cookie->lock); + _leave(""); +} + +/* + * look an object up in the cache from which it was allocated + * - we hold an "access lock" on the parent object, so the parent object cannot + * be withdrawn by either party till we've finished + * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the + * leaf-most cookies of the object and all its children + */ +static void fscache_lookup_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + struct fscache_object *parent; + + _enter(""); + + parent = object->parent; + ASSERT(parent != NULL); + ASSERTCMP(parent->n_ops, >, 0); + ASSERTCMP(parent->n_obj_ops, >, 0); + + /* make sure the parent is still available */ + ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); + + if (parent->state >= FSCACHE_OBJECT_DYING || + test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + _debug("unavailable"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + _leave(""); + return; + } + + _debug("LOOKUP \"%s/%s\" in \"%s\"", + parent->cookie->def->name, cookie->def->name, + object->cache->tag->name); + + fscache_stat(&fscache_n_object_lookups); + object->cache->ops->lookup_object(object); + + if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) + set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + + _leave(""); +} + +/** + * fscache_object_lookup_negative - Note negative cookie lookup + * @object: Object pointing to cookie to mark + * + * Note negative lookup, permitting those waiting to read data from an already + * existing backing object to continue as there's no data for them to read. + */ +void fscache_object_lookup_negative(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x,%s}", + object->debug_id, fscache_object_states[object->state]); + + spin_lock(&object->lock); + if (object->state == FSCACHE_OBJECT_LOOKING_UP) { + fscache_stat(&fscache_n_object_lookups_negative); + + /* transit here to allow write requests to begin stacking up + * and read requests to begin returning ENODATA */ + object->state = FSCACHE_OBJECT_CREATING; + spin_unlock(&object->lock); + + set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + _debug("wake up lookup %p", &cookie->flags); + smp_mb__before_clear_bit(); + clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } else { + ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); + spin_unlock(&object->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(fscache_object_lookup_negative); + +/** + * fscache_obtained_object - Note successful object lookup or creation + * @object: Object pointing to cookie to mark + * + * Note successful lookup and/or creation, permitting those waiting to write + * data to a backing object to continue. + * + * Note that after calling this, an object's cookie may be relinquished by the + * netfs, and so must be accessed with object lock held. + */ +void fscache_obtained_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x,%s}", + object->debug_id, fscache_object_states[object->state]); + + /* if we were still looking up, then we must have a positive lookup + * result, in which case there may be data available */ + spin_lock(&object->lock); + if (object->state == FSCACHE_OBJECT_LOOKING_UP) { + fscache_stat(&fscache_n_object_lookups_positive); + + clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + object->state = FSCACHE_OBJECT_AVAILABLE; + spin_unlock(&object->lock); + + smp_mb__before_clear_bit(); + clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } else { + ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); + fscache_stat(&fscache_n_object_created); + + object->state = FSCACHE_OBJECT_AVAILABLE; + spin_unlock(&object->lock); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + smp_wmb(); + } + + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); + + _leave(""); +} +EXPORT_SYMBOL(fscache_obtained_object); + +/* + * handle an object that has just become available + */ +static void fscache_object_available(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + spin_lock(&object->lock); + + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); + + fscache_done_parent_op(object); + if (object->n_in_progress == 0) { + if (object->n_ops > 0) { + ASSERTCMP(object->n_ops, >=, object->n_obj_ops); + ASSERTIF(object->n_ops > object->n_obj_ops, + !list_empty(&object->pending_ops)); + fscache_start_operations(object); + } else { + ASSERT(list_empty(&object->pending_ops)); + } + } + spin_unlock(&object->lock); + + object->cache->ops->lookup_complete(object); + fscache_enqueue_dependents(object); + + fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); + fscache_stat(&fscache_n_object_avail); + + _leave(""); +} + +/* + * drop an object's attachments + */ +static void fscache_drop_object(struct fscache_object *object) +{ + struct fscache_object *parent = object->parent; + struct fscache_cache *cache = object->cache; + + _enter("{OBJ%x,%d}", object->debug_id, object->n_children); + + spin_lock(&cache->object_list_lock); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); + + cache->ops->drop_object(object); + + if (parent) { + _debug("release parent OBJ%x {%d}", + parent->debug_id, parent->n_children); + + spin_lock(&parent->lock); + parent->n_children--; + if (parent->n_children == 0) + fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); + spin_unlock(&parent->lock); + object->parent = NULL; + } + + /* this just shifts the object release to the slow work processor */ + object->cache->ops->put_object(object); + + _leave(""); +} + +/* + * release or recycle an object that the netfs has discarded + */ +static void fscache_release_object(struct fscache_object *object) +{ + _enter(""); + + fscache_drop_object(object); +} + +/* + * withdraw an object from active service + */ +static void fscache_withdraw_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie; + bool detached; + + _enter(""); + + spin_lock(&object->lock); + cookie = object->cookie; + if (cookie) { + /* need to get the cookie lock before the object lock, starting + * from the object pointer */ + atomic_inc(&cookie->usage); + spin_unlock(&object->lock); + + detached = false; + spin_lock(&cookie->lock); + spin_lock(&object->lock); + + if (object->cookie == cookie) { + hlist_del_init(&object->cookie_link); + object->cookie = NULL; + detached = true; + } + spin_unlock(&cookie->lock); + fscache_cookie_put(cookie); + if (detached) + fscache_cookie_put(cookie); + } + + spin_unlock(&object->lock); + + fscache_drop_object(object); +} + +/* + * withdraw an object from active service at the behest of the cache + * - need break the links to a cached object cookie + * - called under two situations: + * (1) recycler decides to reclaim an in-use object + * (2) a cache is unmounted + * - have to take care as the cookie can be being relinquished by the netfs + * simultaneously + * - the object is pinned by the caller holding a refcount on it + */ +void fscache_withdrawing_object(struct fscache_cache *cache, + struct fscache_object *object) +{ + bool enqueue = false; + + _enter(",OBJ%x", object->debug_id); + + spin_lock(&object->lock); + if (object->state < FSCACHE_OBJECT_WITHDRAWING) { + object->state = FSCACHE_OBJECT_WITHDRAWING; + enqueue = true; + } + spin_unlock(&object->lock); + + if (enqueue) + fscache_enqueue_object(object); + + _leave(""); +} + +/* + * allow the slow work item processor to get a ref on an object + */ +static int fscache_object_slow_work_get_ref(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; +} + +/* + * allow the slow work item processor to discard a ref on a work item + */ +static void fscache_object_slow_work_put_ref(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + return object->cache->ops->put_object(object); +} + +/* + * enqueue an object for metadata-type processing + */ +void fscache_enqueue_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + slow_work_enqueue(&object->work); +} + +/* + * enqueue the dependents of an object for metadata-type processing + * - the caller must hold the object's lock + * - this may cause an already locked object to wind up being processed again + */ +static void fscache_enqueue_dependents(struct fscache_object *object) +{ + struct fscache_object *dep; + + _enter("{OBJ%x}", object->debug_id); + + if (list_empty(&object->dependents)) + return; + + spin_lock(&object->lock); + + while (!list_empty(&object->dependents)) { + dep = list_entry(object->dependents.next, + struct fscache_object, dep_link); + list_del_init(&dep->dep_link); + + + /* sort onto appropriate lists */ + fscache_enqueue_object(dep); + dep->cache->ops->put_object(dep); + + if (!list_empty(&object->dependents)) + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); +} + +/* + * remove an object from whatever queue it's waiting on + * - the caller must hold object->lock + */ +void fscache_dequeue_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + if (!list_empty(&object->dep_link)) { + spin_lock(&object->parent->lock); + list_del_init(&object->dep_link); + spin_unlock(&object->parent->lock); + } + + _leave(""); +} + +/** + * fscache_check_aux - Ask the netfs whether an object on disk is still valid + * @object: The object to ask about + * @data: The auxiliary data for the object + * @datalen: The size of the auxiliary data + * + * This function consults the netfs about the coherency state of an object + */ +enum fscache_checkaux fscache_check_aux(struct fscache_object *object, + const void *data, uint16_t datalen) +{ + enum fscache_checkaux result; + + if (!object->cookie->def->check_aux) { + fscache_stat(&fscache_n_checkaux_none); + return FSCACHE_CHECKAUX_OKAY; + } + + result = object->cookie->def->check_aux(object->cookie->netfs_data, + data, datalen); + switch (result) { + /* entry okay as is */ + case FSCACHE_CHECKAUX_OKAY: + fscache_stat(&fscache_n_checkaux_okay); + break; + + /* entry requires update */ + case FSCACHE_CHECKAUX_NEEDS_UPDATE: + fscache_stat(&fscache_n_checkaux_update); + break; + + /* entry requires deletion */ + case FSCACHE_CHECKAUX_OBSOLETE: + fscache_stat(&fscache_n_checkaux_obsolete); + break; + + default: + BUG(); + } + + return result; +} +EXPORT_SYMBOL(fscache_check_aux); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c new file mode 100644 index 00000000000..e7f8d53b8b6 --- /dev/null +++ b/fs/fscache/operation.c @@ -0,0 +1,459 @@ +/* FS-Cache worker operation management routines + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * See Documentation/filesystems/caching/operations.txt + */ + +#define FSCACHE_DEBUG_LEVEL OPERATION +#include <linux/module.h> +#include "internal.h" + +atomic_t fscache_op_debug_id; +EXPORT_SYMBOL(fscache_op_debug_id); + +/** + * fscache_enqueue_operation - Enqueue an operation for processing + * @op: The operation to enqueue + * + * Enqueue an operation for processing by the FS-Cache thread pool. + * + * This will get its own ref on the object. + */ +void fscache_enqueue_operation(struct fscache_operation *op) +{ + _enter("{OBJ%x OP%x,%u}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERT(op->processor != NULL); + ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); + ASSERTCMP(atomic_read(&op->usage), >, 0); + + if (list_empty(&op->pend_link)) { + switch (op->flags & FSCACHE_OP_TYPE) { + case FSCACHE_OP_FAST: + _debug("queue fast"); + atomic_inc(&op->usage); + if (!schedule_work(&op->fast_work)) + fscache_put_operation(op); + break; + case FSCACHE_OP_SLOW: + _debug("queue slow"); + slow_work_enqueue(&op->slow_work); + break; + case FSCACHE_OP_MYTHREAD: + _debug("queue for caller's attention"); + break; + default: + printk(KERN_ERR "FS-Cache: Unexpected op type %lx", + op->flags); + BUG(); + break; + } + fscache_stat(&fscache_n_op_enqueue); + } +} +EXPORT_SYMBOL(fscache_enqueue_operation); + +/* + * start an op running + */ +static void fscache_run_op(struct fscache_object *object, + struct fscache_operation *op) +{ + object->n_in_progress++; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + if (op->processor) + fscache_enqueue_operation(op); + fscache_stat(&fscache_n_op_run); +} + +/* + * submit an exclusive operation for an object + * - other ops are excluded from running simultaneously with this one + * - this gets any extra refs it needs on an op + */ +int fscache_submit_exclusive_op(struct fscache_object *object, + struct fscache_operation *op) +{ + int ret; + + _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + + spin_lock(&object->lock); + ASSERTCMP(object->n_ops, >=, object->n_in_progress); + ASSERTCMP(object->n_ops, >=, object->n_exclusive); + + ret = -ENOBUFS; + if (fscache_object_is_active(object)) { + op->object = object; + object->n_ops++; + object->n_exclusive++; /* reads and writes must wait */ + + if (object->n_ops > 0) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + } else if (!list_empty(&object->pending_ops)) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + fscache_start_operations(object); + } else { + ASSERTCMP(object->n_in_progress, ==, 0); + fscache_run_op(object, op); + } + + /* need to issue a new write op after this */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + ret = 0; + } else if (object->state == FSCACHE_OBJECT_CREATING) { + op->object = object; + object->n_ops++; + object->n_exclusive++; /* reads and writes must wait */ + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + ret = 0; + } else { + /* not allowed to submit ops in any other state */ + BUG(); + } + + spin_unlock(&object->lock); + return ret; +} + +/* + * report an unexpected submission + */ +static void fscache_report_unexpected_submission(struct fscache_object *object, + struct fscache_operation *op, + unsigned long ostate) +{ + static bool once_only; + struct fscache_operation *p; + unsigned n; + + if (once_only) + return; + once_only = true; + + kdebug("unexpected submission OP%x [OBJ%x %s]", + op->debug_id, object->debug_id, + fscache_object_states[object->state]); + kdebug("objstate=%s [%s]", + fscache_object_states[object->state], + fscache_object_states[ostate]); + kdebug("objflags=%lx", object->flags); + kdebug("objevent=%lx [%lx]", object->events, object->event_mask); + kdebug("ops=%u inp=%u exc=%u", + object->n_ops, object->n_in_progress, object->n_exclusive); + + if (!list_empty(&object->pending_ops)) { + n = 0; + list_for_each_entry(p, &object->pending_ops, pend_link) { + ASSERTCMP(p->object, ==, object); + kdebug("%p %p", op->processor, op->release); + n++; + } + + kdebug("n=%u", n); + } + + dump_stack(); +} + +/* + * submit an operation for an object + * - objects may be submitted only in the following states: + * - during object creation (write ops may be submitted) + * - whilst the object is active + * - after an I/O error incurred in one of the two above states (op rejected) + * - this gets any extra refs it needs on an op + */ +int fscache_submit_op(struct fscache_object *object, + struct fscache_operation *op) +{ + unsigned long ostate; + int ret; + + _enter("{OBJ%x OP%x},{%u}", + object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERTCMP(atomic_read(&op->usage), >, 0); + + spin_lock(&object->lock); + ASSERTCMP(object->n_ops, >=, object->n_in_progress); + ASSERTCMP(object->n_ops, >=, object->n_exclusive); + + ostate = object->state; + smp_rmb(); + + if (fscache_object_is_active(object)) { + op->object = object; + object->n_ops++; + + if (object->n_exclusive > 0) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + } else if (!list_empty(&object->pending_ops)) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + fscache_start_operations(object); + } else { + ASSERTCMP(object->n_exclusive, ==, 0); + fscache_run_op(object, op); + } + ret = 0; + } else if (object->state == FSCACHE_OBJECT_CREATING) { + op->object = object; + object->n_ops++; + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + ret = 0; + } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + fscache_report_unexpected_submission(object, op, ostate); + ASSERT(!fscache_object_is_active(object)); + ret = -ENOBUFS; + } else { + ret = -ENOBUFS; + } + + spin_unlock(&object->lock); + return ret; +} + +/* + * queue an object for withdrawal on error, aborting all following asynchronous + * operations + */ +void fscache_abort_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); +} + +/* + * jump start the operation processing on an object + * - caller must hold object->lock + */ +void fscache_start_operations(struct fscache_object *object) +{ + struct fscache_operation *op; + bool stop = false; + + while (!list_empty(&object->pending_ops) && !stop) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + if (object->n_in_progress > 0) + break; + stop = true; + } + list_del_init(&op->pend_link); + object->n_in_progress++; + + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + if (op->processor) + fscache_enqueue_operation(op); + + /* the pending queue was holding a ref on the object */ + fscache_put_operation(op); + } + + ASSERTCMP(object->n_in_progress, <=, object->n_ops); + + _debug("woke %d ops on OBJ%x", + object->n_in_progress, object->debug_id); +} + +/* + * release an operation + * - queues pending ops if this is the last in-progress op + */ +void fscache_put_operation(struct fscache_operation *op) +{ + struct fscache_object *object; + struct fscache_cache *cache; + + _enter("{OBJ%x OP%x,%d}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERTCMP(atomic_read(&op->usage), >, 0); + + if (!atomic_dec_and_test(&op->usage)) + return; + + _debug("PUT OP"); + if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) + BUG(); + + fscache_stat(&fscache_n_op_release); + + if (op->release) { + op->release(op); + op->release = NULL; + } + + object = op->object; + + /* now... we may get called with the object spinlock held, so we + * complete the cleanup here only if we can immediately acquire the + * lock, and defer it otherwise */ + if (!spin_trylock(&object->lock)) { + _debug("defer put"); + fscache_stat(&fscache_n_op_deferred_release); + + cache = object->cache; + spin_lock(&cache->op_gc_list_lock); + list_add_tail(&op->pend_link, &cache->op_gc_list); + spin_unlock(&cache->op_gc_list_lock); + schedule_work(&cache->op_gc); + _leave(" [defer]"); + return; + } + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + ASSERTCMP(object->n_exclusive, >, 0); + object->n_exclusive--; + } + + ASSERTCMP(object->n_in_progress, >, 0); + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); + + spin_unlock(&object->lock); + + kfree(op); + _leave(" [done]"); +} +EXPORT_SYMBOL(fscache_put_operation); + +/* + * garbage collect operations that have had their release deferred + */ +void fscache_operation_gc(struct work_struct *work) +{ + struct fscache_operation *op; + struct fscache_object *object; + struct fscache_cache *cache = + container_of(work, struct fscache_cache, op_gc); + int count = 0; + + _enter(""); + + do { + spin_lock(&cache->op_gc_list_lock); + if (list_empty(&cache->op_gc_list)) { + spin_unlock(&cache->op_gc_list_lock); + break; + } + + op = list_entry(cache->op_gc_list.next, + struct fscache_operation, pend_link); + list_del(&op->pend_link); + spin_unlock(&cache->op_gc_list_lock); + + object = op->object; + + _debug("GC DEFERRED REL OBJ%x OP%x", + object->debug_id, op->debug_id); + fscache_stat(&fscache_n_op_gc); + + ASSERTCMP(atomic_read(&op->usage), ==, 0); + + spin_lock(&object->lock); + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + ASSERTCMP(object->n_exclusive, >, 0); + object->n_exclusive--; + } + + ASSERTCMP(object->n_in_progress, >, 0); + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); + + spin_unlock(&object->lock); + + } while (count++ < 20); + + if (!list_empty(&cache->op_gc_list)) + schedule_work(&cache->op_gc); + + _leave(""); +} + +/* + * allow the slow work item processor to get a ref on an operation + */ +static int fscache_op_get_ref(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + atomic_inc(&op->usage); + return 0; +} + +/* + * allow the slow work item processor to discard a ref on an operation + */ +static void fscache_op_put_ref(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + fscache_put_operation(op); +} + +/* + * execute an operation using the slow thread pool to provide processing context + * - the caller holds a ref to this object, so we don't need to hold one + */ +static void fscache_op_execute(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + unsigned long start; + + _enter("{OBJ%x OP%x,%d}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERT(op->processor != NULL); + start = jiffies; + op->processor(op); + fscache_hist(fscache_ops_histogram, start); + + _leave(""); +} + +const struct slow_work_ops fscache_op_slow_work_ops = { + .get_ref = fscache_op_get_ref, + .put_ref = fscache_op_put_ref, + .execute = fscache_op_execute, +}; diff --git a/fs/fscache/page.c b/fs/fscache/page.c new file mode 100644 index 00000000000..2568e0eb644 --- /dev/null +++ b/fs/fscache/page.c @@ -0,0 +1,816 @@ +/* Cache page management and data I/O routines + * + * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL PAGE +#include <linux/module.h> +#include <linux/fscache-cache.h> +#include <linux/buffer_head.h> +#include <linux/pagevec.h> +#include "internal.h" + +/* + * check to see if a page is being written to the cache + */ +bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) +{ + void *val; + + rcu_read_lock(); + val = radix_tree_lookup(&cookie->stores, page->index); + rcu_read_unlock(); + + return val != NULL; +} +EXPORT_SYMBOL(__fscache_check_page_write); + +/* + * wait for a page to finish being written to the cache + */ +void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) +{ + wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); + + wait_event(*wq, !__fscache_check_page_write(cookie, page)); +} +EXPORT_SYMBOL(__fscache_wait_on_page_write); + +/* + * note that a page has finished being written to the cache + */ +static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) +{ + struct page *xpage; + + spin_lock(&cookie->lock); + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->lock); + ASSERT(xpage != NULL); + + wake_up_bit(&cookie->flags, 0); +} + +/* + * actually apply the changed attributes to a cache object + */ +static void fscache_attr_changed_op(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + + _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); + + fscache_stat(&fscache_n_attr_changed_calls); + + if (fscache_object_is_active(object) && + object->cache->ops->attr_changed(object) < 0) + fscache_abort_object(object); + + _leave(""); +} + +/* + * notification that the attributes on an object have changed + */ +int __fscache_attr_changed(struct fscache_cookie *cookie) +{ + struct fscache_operation *op; + struct fscache_object *object; + + _enter("%p", cookie); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + + fscache_stat(&fscache_n_attr_changed); + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_stat(&fscache_n_attr_changed_nomem); + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + fscache_operation_init(op, NULL); + fscache_operation_init_slow(op, fscache_attr_changed_op); + op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_exclusive_op(object, op) < 0) + goto nobufs; + spin_unlock(&cookie->lock); + fscache_stat(&fscache_n_attr_changed_ok); + fscache_put_operation(op); + _leave(" = 0"); + return 0; + +nobufs: + spin_unlock(&cookie->lock); + kfree(op); + fscache_stat(&fscache_n_attr_changed_nobufs); + _leave(" = %d", -ENOBUFS); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_attr_changed); + +/* + * handle secondary execution given to a retrieval op on behalf of the + * cache + */ +static void fscache_retrieval_work(struct work_struct *work) +{ + struct fscache_retrieval *op = + container_of(work, struct fscache_retrieval, op.fast_work); + unsigned long start; + + _enter("{OP%x}", op->op.debug_id); + + start = jiffies; + op->op.processor(&op->op); + fscache_hist(fscache_ops_histogram, start); + fscache_put_operation(&op->op); +} + +/* + * release a retrieval op reference + */ +static void fscache_release_retrieval_op(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + _enter("{OP%x}", op->op.debug_id); + + fscache_hist(fscache_retrieval_histogram, op->start_time); + if (op->context) + fscache_put_context(op->op.object->cookie, op->context); + + _leave(""); +} + +/* + * allocate a retrieval op + */ +static struct fscache_retrieval *fscache_alloc_retrieval( + struct address_space *mapping, + fscache_rw_complete_t end_io_func, + void *context) +{ + struct fscache_retrieval *op; + + /* allocate a retrieval operation and attempt to submit it */ + op = kzalloc(sizeof(*op), GFP_NOIO); + if (!op) { + fscache_stat(&fscache_n_retrievals_nomem); + return NULL; + } + + fscache_operation_init(&op->op, fscache_release_retrieval_op); + op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); + op->mapping = mapping; + op->end_io_func = end_io_func; + op->context = context; + op->start_time = jiffies; + INIT_WORK(&op->op.fast_work, fscache_retrieval_work); + INIT_LIST_HEAD(&op->to_do); + return op; +} + +/* + * wait for a deferred lookup to complete + */ +static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) +{ + unsigned long jif; + + _enter(""); + + if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) { + _leave(" = 0 [imm]"); + return 0; + } + + fscache_stat(&fscache_n_retrievals_wait); + + jif = jiffies; + if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) != 0) { + fscache_stat(&fscache_n_retrievals_intr); + _leave(" = -ERESTARTSYS"); + return -ERESTARTSYS; + } + + ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)); + + smp_rmb(); + fscache_hist(fscache_retrieval_delay_histogram, jif); + _leave(" = 0 [dly]"); + return 0; +} + +/* + * read a page from the cache or allocate a block in which to store it + * - we return: + * -ENOMEM - out of memory, nothing done + * -ERESTARTSYS - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * -ENODATA - no data available in the backing object for this block + * 0 - dispatched a read - it'll call end_io_func() when finished + */ +int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,%p,,,", cookie, page); + + fscache_stat(&fscache_n_retrievals); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(page->mapping, end_io_func, context); + if (!op) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_retrieval_ops); + + /* pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure */ + fscache_get_context(object->cookie, op->context); + + /* we wait for the operation to become active, and then process it + * *here*, in this thread, and not in the thread pool */ + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_retrieval_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { + ret = object->cache->ops->allocate_page(op, page, gfp); + if (ret == 0) + ret = -ENODATA; + } else { + ret = object->cache->ops->read_or_alloc_page(op, page, gfp); + } + + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_retrievals_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_read_or_alloc_page); + +/* + * read a list of page from the cache or allocate a block in which to store + * them + * - we return: + * -ENOMEM - out of memory, some pages may be being read + * -ERESTARTSYS - interrupted, some pages may be being read + * -ENOBUFS - no backing object or space available in which to cache any + * pages not being read + * -ENODATA - no data available in the backing object for some or all of + * the pages + * 0 - dispatched a read on all pages + * + * end_io_func() will be called for each page read from the cache as it is + * finishes being read + * + * any pages for which a read is dispatched will be removed from pages and + * nr_pages + */ +int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + fscache_pages_retrieval_func_t func; + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,,%d,,,", cookie, *nr_pages); + + fscache_stat(&fscache_n_retrievals); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(*nr_pages, >, 0); + ASSERT(!list_empty(pages)); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(mapping, end_io_func, context); + if (!op) + return -ENOMEM; + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_retrieval_ops); + + /* pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure */ + fscache_get_context(object->cookie, op->context); + + /* we wait for the operation to become active, and then process it + * *here*, in this thread, and not in the thread pool */ + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_retrieval_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) + func = object->cache->ops->allocate_pages; + else + func = object->cache->ops->read_or_alloc_pages; + ret = func(op, pages, nr_pages, gfp); + + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_retrievals_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_read_or_alloc_pages); + +/* + * allocate a block in the cache on which to store a page + * - we return: + * -ENOMEM - out of memory, nothing done + * -ERESTARTSYS - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * 0 - block allocated + */ +int __fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,%p,,,", cookie, page); + + fscache_stat(&fscache_n_allocs); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(page->mapping, NULL, NULL); + if (!op) + return -ENOMEM; + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_alloc_ops); + + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_alloc_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + ret = object->cache->ops->allocate_page(op, page, gfp); + + if (ret < 0) + fscache_stat(&fscache_n_allocs_nobufs); + else + fscache_stat(&fscache_n_allocs_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_allocs_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_alloc_page); + +/* + * release a write op reference + */ +static void fscache_release_write_op(struct fscache_operation *_op) +{ + _enter("{OP%x}", _op->debug_id); +} + +/* + * perform the background storage of a page into the cache + */ +static void fscache_write_op(struct fscache_operation *_op) +{ + struct fscache_storage *op = + container_of(_op, struct fscache_storage, op); + struct fscache_object *object = op->op.object; + struct fscache_cookie *cookie = object->cookie; + struct page *page; + unsigned n; + void *results[1]; + int ret; + + _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); + + spin_lock(&cookie->lock); + spin_lock(&object->lock); + + if (!fscache_object_is_active(object)) { + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + _leave(""); + return; + } + + fscache_stat(&fscache_n_store_calls); + + /* find a page to store */ + page = NULL; + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, + FSCACHE_COOKIE_PENDING_TAG); + if (n != 1) + goto superseded; + page = results[0]; + _debug("gang %d [%lx]", n, page->index); + if (page->index > op->store_limit) + goto superseded; + + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + + if (page) { + ret = object->cache->ops->write_page(op, page); + fscache_end_page_write(cookie, page); + page_cache_release(page); + if (ret < 0) + fscache_abort_object(object); + else + fscache_enqueue_operation(&op->op); + } + + _leave(""); + return; + +superseded: + /* this writer is going away and there aren't any more things to + * write */ + _debug("cease"); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + _leave(""); +} + +/* + * request a page be stored in the cache + * - returns: + * -ENOMEM - out of memory, nothing done + * -ENOBUFS - no backing object available in which to cache the page + * 0 - dispatched a write - it'll call end_io_func() when finished + * + * if the cookie still has a backing object at this point, that object can be + * in one of a few states with respect to storage processing: + * + * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is + * set) + * + * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred + * fill op) + * + * (b) writes deferred till post-creation (mark page for writing and + * return immediately) + * + * (2) negative lookup, object created, initial fill being made from netfs + * (FSCACHE_COOKIE_INITIAL_FILL is set) + * + * (a) fill point not yet reached this page (mark page for writing and + * return) + * + * (b) fill point passed this page (queue op to store this page) + * + * (3) object extant (queue op to store this page) + * + * any other state is invalid + */ +int __fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct fscache_storage *op; + struct fscache_object *object; + int ret; + + _enter("%p,%x,", cookie, (u32) page->flags); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERT(PageFsCache(page)); + + fscache_stat(&fscache_n_stores); + + op = kzalloc(sizeof(*op), GFP_NOIO); + if (!op) + goto nomem; + + fscache_operation_init(&op->op, fscache_release_write_op); + fscache_operation_init_slow(&op->op, fscache_write_op); + op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); + + ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + if (ret < 0) + goto nomem_free; + + ret = -ENOBUFS; + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto nobufs; + + /* add the page to the pending-storage radix tree on the backing + * object */ + spin_lock(&object->lock); + + _debug("store limit %llx", (unsigned long long) object->store_limit); + + ret = radix_tree_insert(&cookie->stores, page->index, page); + if (ret < 0) { + if (ret == -EEXIST) + goto already_queued; + _debug("insert failed %d", ret); + goto nobufs_unlock_obj; + } + + radix_tree_tag_set(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + page_cache_get(page); + + /* we only want one writer at a time, but we do need to queue new + * writers after exclusive ops */ + if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) + goto already_pending; + + spin_unlock(&object->lock); + + op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); + op->store_limit = object->store_limit; + + if (fscache_submit_op(object, &op->op) < 0) + goto submit_failed; + + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + fscache_stat(&fscache_n_store_ops); + fscache_stat(&fscache_n_stores_ok); + + /* the slow work queue now carries its own ref on the object */ + fscache_put_operation(&op->op); + _leave(" = 0"); + return 0; + +already_queued: + fscache_stat(&fscache_n_stores_again); +already_pending: + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + kfree(op); + fscache_stat(&fscache_n_stores_ok); + _leave(" = 0"); + return 0; + +submit_failed: + radix_tree_delete(&cookie->stores, page->index); + page_cache_release(page); + ret = -ENOBUFS; + goto nobufs; + +nobufs_unlock_obj: + spin_unlock(&object->lock); +nobufs: + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + kfree(op); + fscache_stat(&fscache_n_stores_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; + +nomem_free: + kfree(op); +nomem: + fscache_stat(&fscache_n_stores_oom); + _leave(" = -ENOMEM"); + return -ENOMEM; +} +EXPORT_SYMBOL(__fscache_write_page); + +/* + * remove a page from the cache + */ +void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) +{ + struct fscache_object *object; + + _enter(",%p", page); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + fscache_stat(&fscache_n_uncaches); + + /* cache withdrawal may beat us to it */ + if (!PageFsCache(page)) + goto done; + + /* get the object */ + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) { + ClearPageFsCache(page); + goto done_unlock; + } + + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + /* there might now be stuff on disk we could read */ + clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* only invoke the cache backend if we managed to mark the page + * uncached here; this deals with synchronisation vs withdrawal */ + if (TestClearPageFsCache(page) && + object->cache->ops->uncache_page) { + /* the cache backend releases the cookie lock */ + object->cache->ops->uncache_page(object, page); + goto done; + } + +done_unlock: + spin_unlock(&cookie->lock); +done: + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_page); + +/** + * fscache_mark_pages_cached - Mark pages as being cached + * @op: The retrieval op pages are being marked for + * @pagevec: The pages to be marked + * + * Mark a bunch of netfs pages as being cached. After this is called, + * the netfs must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_pages_cached(struct fscache_retrieval *op, + struct pagevec *pagevec) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + unsigned long loop; + +#ifdef CONFIG_FSCACHE_STATS + atomic_add(pagevec->nr, &fscache_n_marks); +#endif + + for (loop = 0; loop < pagevec->nr; loop++) { + struct page *page = pagevec->pages[loop]; + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + } + + if (cookie->def->mark_pages_cached) + cookie->def->mark_pages_cached(cookie->netfs_data, + op->mapping, pagevec); + pagevec_reinit(pagevec); +} +EXPORT_SYMBOL(fscache_mark_pages_cached); diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c new file mode 100644 index 00000000000..beeab44bc31 --- /dev/null +++ b/fs/fscache/proc.c @@ -0,0 +1,68 @@ +/* FS-Cache statistics viewing interface + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL OPERATION +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include "internal.h" + +/* + * initialise the /proc/fs/fscache/ directory + */ +int __init fscache_proc_init(void) +{ + _enter(""); + + if (!proc_mkdir("fs/fscache", NULL)) + goto error_dir; + +#ifdef CONFIG_FSCACHE_STATS + if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, + &fscache_stats_fops)) + goto error_stats; +#endif + +#ifdef CONFIG_FSCACHE_HISTOGRAM + if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, + &fscache_histogram_fops)) + goto error_histogram; +#endif + + _leave(" = 0"); + return 0; + +#ifdef CONFIG_FSCACHE_HISTOGRAM +error_histogram: +#endif +#ifdef CONFIG_FSCACHE_STATS + remove_proc_entry("fs/fscache/stats", NULL); +error_stats: +#endif + remove_proc_entry("fs/fscache", NULL); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/fscache/ directory + */ +void fscache_proc_cleanup(void) +{ +#ifdef CONFIG_FSCACHE_HISTOGRAM + remove_proc_entry("fs/fscache/histogram", NULL); +#endif +#ifdef CONFIG_FSCACHE_STATS + remove_proc_entry("fs/fscache/stats", NULL); +#endif + remove_proc_entry("fs/fscache", NULL); +} diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c new file mode 100644 index 00000000000..65deb99e756 --- /dev/null +++ b/fs/fscache/stats.c @@ -0,0 +1,212 @@ +/* FS-Cache statistics + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL THREAD +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include "internal.h" + +/* + * operation counters + */ +atomic_t fscache_n_op_pend; +atomic_t fscache_n_op_run; +atomic_t fscache_n_op_enqueue; +atomic_t fscache_n_op_requeue; +atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_release; +atomic_t fscache_n_op_gc; + +atomic_t fscache_n_attr_changed; +atomic_t fscache_n_attr_changed_ok; +atomic_t fscache_n_attr_changed_nobufs; +atomic_t fscache_n_attr_changed_nomem; +atomic_t fscache_n_attr_changed_calls; + +atomic_t fscache_n_allocs; +atomic_t fscache_n_allocs_ok; +atomic_t fscache_n_allocs_wait; +atomic_t fscache_n_allocs_nobufs; +atomic_t fscache_n_alloc_ops; +atomic_t fscache_n_alloc_op_waits; + +atomic_t fscache_n_retrievals; +atomic_t fscache_n_retrievals_ok; +atomic_t fscache_n_retrievals_wait; +atomic_t fscache_n_retrievals_nodata; +atomic_t fscache_n_retrievals_nobufs; +atomic_t fscache_n_retrievals_intr; +atomic_t fscache_n_retrievals_nomem; +atomic_t fscache_n_retrieval_ops; +atomic_t fscache_n_retrieval_op_waits; + +atomic_t fscache_n_stores; +atomic_t fscache_n_stores_ok; +atomic_t fscache_n_stores_again; +atomic_t fscache_n_stores_nobufs; +atomic_t fscache_n_stores_oom; +atomic_t fscache_n_store_ops; +atomic_t fscache_n_store_calls; + +atomic_t fscache_n_marks; +atomic_t fscache_n_uncaches; + +atomic_t fscache_n_acquires; +atomic_t fscache_n_acquires_null; +atomic_t fscache_n_acquires_no_cache; +atomic_t fscache_n_acquires_ok; +atomic_t fscache_n_acquires_nobufs; +atomic_t fscache_n_acquires_oom; + +atomic_t fscache_n_updates; +atomic_t fscache_n_updates_null; +atomic_t fscache_n_updates_run; + +atomic_t fscache_n_relinquishes; +atomic_t fscache_n_relinquishes_null; +atomic_t fscache_n_relinquishes_waitcrt; + +atomic_t fscache_n_cookie_index; +atomic_t fscache_n_cookie_data; +atomic_t fscache_n_cookie_special; + +atomic_t fscache_n_object_alloc; +atomic_t fscache_n_object_no_alloc; +atomic_t fscache_n_object_lookups; +atomic_t fscache_n_object_lookups_negative; +atomic_t fscache_n_object_lookups_positive; +atomic_t fscache_n_object_created; +atomic_t fscache_n_object_avail; +atomic_t fscache_n_object_dead; + +atomic_t fscache_n_checkaux_none; +atomic_t fscache_n_checkaux_okay; +atomic_t fscache_n_checkaux_update; +atomic_t fscache_n_checkaux_obsolete; + +/* + * display the general statistics + */ +static int fscache_stats_show(struct seq_file *m, void *v) +{ + seq_puts(m, "FS-Cache statistics\n"); + + seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n", + atomic_read(&fscache_n_cookie_index), + atomic_read(&fscache_n_cookie_data), + atomic_read(&fscache_n_cookie_special)); + + seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n", + atomic_read(&fscache_n_object_alloc), + atomic_read(&fscache_n_object_no_alloc), + atomic_read(&fscache_n_object_avail), + atomic_read(&fscache_n_object_dead)); + seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n", + atomic_read(&fscache_n_checkaux_none), + atomic_read(&fscache_n_checkaux_okay), + atomic_read(&fscache_n_checkaux_update), + atomic_read(&fscache_n_checkaux_obsolete)); + + seq_printf(m, "Pages : mrk=%u unc=%u\n", + atomic_read(&fscache_n_marks), + atomic_read(&fscache_n_uncaches)); + + seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u" + " oom=%u\n", + atomic_read(&fscache_n_acquires), + atomic_read(&fscache_n_acquires_null), + atomic_read(&fscache_n_acquires_no_cache), + atomic_read(&fscache_n_acquires_ok), + atomic_read(&fscache_n_acquires_nobufs), + atomic_read(&fscache_n_acquires_oom)); + + seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", + atomic_read(&fscache_n_object_lookups), + atomic_read(&fscache_n_object_lookups_negative), + atomic_read(&fscache_n_object_lookups_positive), + atomic_read(&fscache_n_object_created)); + + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", + atomic_read(&fscache_n_updates), + atomic_read(&fscache_n_updates_null), + atomic_read(&fscache_n_updates_run)); + + seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", + atomic_read(&fscache_n_relinquishes), + atomic_read(&fscache_n_relinquishes_null), + atomic_read(&fscache_n_relinquishes_waitcrt)); + + seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", + atomic_read(&fscache_n_attr_changed), + atomic_read(&fscache_n_attr_changed_ok), + atomic_read(&fscache_n_attr_changed_nobufs), + atomic_read(&fscache_n_attr_changed_nomem), + atomic_read(&fscache_n_attr_changed_calls)); + + seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", + atomic_read(&fscache_n_allocs), + atomic_read(&fscache_n_allocs_ok), + atomic_read(&fscache_n_allocs_wait), + atomic_read(&fscache_n_allocs_nobufs)); + seq_printf(m, "Allocs : ops=%u owt=%u\n", + atomic_read(&fscache_n_alloc_ops), + atomic_read(&fscache_n_alloc_op_waits)); + + seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" + " int=%u oom=%u\n", + atomic_read(&fscache_n_retrievals), + atomic_read(&fscache_n_retrievals_ok), + atomic_read(&fscache_n_retrievals_wait), + atomic_read(&fscache_n_retrievals_nodata), + atomic_read(&fscache_n_retrievals_nobufs), + atomic_read(&fscache_n_retrievals_intr), + atomic_read(&fscache_n_retrievals_nomem)); + seq_printf(m, "Retrvls: ops=%u owt=%u\n", + atomic_read(&fscache_n_retrieval_ops), + atomic_read(&fscache_n_retrieval_op_waits)); + + seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", + atomic_read(&fscache_n_stores), + atomic_read(&fscache_n_stores_ok), + atomic_read(&fscache_n_stores_again), + atomic_read(&fscache_n_stores_nobufs), + atomic_read(&fscache_n_stores_oom)); + seq_printf(m, "Stores : ops=%u run=%u\n", + atomic_read(&fscache_n_store_ops), + atomic_read(&fscache_n_store_calls)); + + seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", + atomic_read(&fscache_n_op_pend), + atomic_read(&fscache_n_op_run), + atomic_read(&fscache_n_op_enqueue)); + seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", + atomic_read(&fscache_n_op_deferred_release), + atomic_read(&fscache_n_op_release), + atomic_read(&fscache_n_op_gc)); + return 0; +} + +/* + * open "/proc/fs/fscache/stats" allowing provision of a statistical summary + */ +static int fscache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, fscache_stats_show, NULL); +} + +const struct file_operations fscache_stats_fops = { + .owner = THIS_MODULE, + .open = fscache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 36fe20d6eba..e67f3ec0773 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -84,3 +84,11 @@ config ROOT_NFS <file:Documentation/filesystems/nfsroot.txt>. Most people say N here. + +config NFS_FSCACHE + bool "Provide NFS client caching support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y + help + Say Y here if you want NFS data to be cached locally on disc through + the general filesystem cache manager diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ac6170c594a..845159814de 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,3 +15,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index aba38017bde..75c9cd2aa11 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -45,6 +45,7 @@ #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -154,6 +155,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ if (!IS_ERR(cred)) clp->cl_machine_cred = cred; + nfs_fscache_get_client_cookie(clp); + return clp; error_3: @@ -187,6 +190,8 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_shutdown_client(clp); + nfs_fscache_release_client_cookie(clp); + /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); @@ -760,6 +765,7 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; + server->options = data->options; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1148,6 +1154,7 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->caps |= NFS_CAP_ATOMIC_OPEN; + server->options = data->options; /* Get a client record */ error = nfs4_set_client(server, @@ -1559,7 +1566,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) /* display header on line 1 */ if (v == &nfs_volume_list) { - seq_puts(m, "NV SERVER PORT DEV FSID\n"); + seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); return 0; } /* display one transport per line on subsequent lines */ @@ -1573,12 +1580,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - seq_printf(m, "v%u %s %s %-7s %-17s\n", + seq_printf(m, "v%u %s %s %-7s %-17s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), dev, - fsid); + fsid, + nfs_server_fscache_state(server)); return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0abf3f331f5..3523b895eb4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -35,6 +35,7 @@ #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -409,6 +410,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, return copied; } +/* + * Partially or wholly invalidate a page + * - Release the private state associated with a page if undergoing complete + * page invalidation + * - Called if either PG_private or PG_fscache is set on the page + * - Caller holds page lock + */ static void nfs_invalidate_page(struct page *page, unsigned long offset) { dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); @@ -417,23 +425,43 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) return; /* Cancel any unstarted writes on this page */ nfs_wb_page_cancel(page->mapping->host, page); + + nfs_fscache_invalidate_page(page, page->mapping->host); } +/* + * Attempt to release the private state associated with a page + * - Called if either PG_private or PG_fscache is set on the page + * - Caller holds page lock + * - Return true (may release page) or false (may not) + */ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); /* If PagePrivate() is set, then the page is not freeable */ - return 0; + if (PagePrivate(page)) + return 0; + return nfs_fscache_release_page(page, gfp); } +/* + * Attempt to clear the private state associated with a page when an error + * occurs that requires the cached contents of an inode to be written back or + * destroyed + * - Called if either PG_private or fscache is set on the page + * - Caller holds page lock + * - Return 0 if successful, -error otherwise + */ static int nfs_launder_page(struct page *page) { struct inode *inode = page->mapping->host; + struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", inode->i_ino, (long long)page_offset(page)); + nfs_fscache_wait_on_page_write(nfsi, page); return nfs_wb_page(inode, page); } @@ -451,6 +479,11 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; +/* + * Notification that a PTE pointing to an NFS page is about to be made + * writable, implying that someone is about to modify the page through a + * shared-writable mapping + */ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; @@ -465,6 +498,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) filp->f_mapping->host->i_ino, (long long)page_offset(page)); + /* make sure the cache has finished storing the page */ + nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + lock_page(page); mapping = page->mapping; if (mapping != dentry->d_inode->i_mapping) diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c new file mode 100644 index 00000000000..5b1006480bc --- /dev/null +++ b/fs/nfs/fscache-index.c @@ -0,0 +1,337 @@ +/* NFS FS-Cache index structure definition + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/in6.h> + +#include "internal.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_FSCACHE + +/* + * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks + * the cookie for the top-level index object for NFS into here. The top-level + * index can than have other cache objects inserted into it. + */ +struct fscache_netfs nfs_fscache_netfs = { + .name = "nfs", + .version = 0, +}; + +/* + * Register NFS for caching + */ +int nfs_fscache_register(void) +{ + return fscache_register_netfs(&nfs_fscache_netfs); +} + +/* + * Unregister NFS for caching + */ +void nfs_fscache_unregister(void) +{ + fscache_unregister_netfs(&nfs_fscache_netfs); +} + +/* + * Layout of the key for an NFS server cache object. + */ +struct nfs_server_key { + uint16_t nfsversion; /* NFS protocol version */ + uint16_t family; /* address family */ + uint16_t port; /* IP port */ + union { + struct in_addr ipv4_addr; /* IPv4 address */ + struct in6_addr ipv6_addr; /* IPv6 address */ + } addr[0]; +}; + +/* + * Generate a key to describe a server in the main NFS index + * - We return the length of the key, or 0 if we can't generate one + */ +static uint16_t nfs_server_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_client *clp = cookie_netfs_data; + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; + const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; + struct nfs_server_key *key = buffer; + uint16_t len = sizeof(struct nfs_server_key); + + key->nfsversion = clp->rpc_ops->version; + key->family = clp->cl_addr.ss_family; + + memset(key, 0, len); + + switch (clp->cl_addr.ss_family) { + case AF_INET: + key->port = sin->sin_port; + key->addr[0].ipv4_addr = sin->sin_addr; + len += sizeof(key->addr[0].ipv4_addr); + break; + + case AF_INET6: + key->port = sin6->sin6_port; + key->addr[0].ipv6_addr = sin6->sin6_addr; + len += sizeof(key->addr[0].ipv6_addr); + break; + + default: + printk(KERN_WARNING "NFS: Unknown network family '%d'\n", + clp->cl_addr.ss_family); + len = 0; + break; + } + + return len; +} + +/* + * Define the server object for FS-Cache. This is used to describe a server + * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and + * server address parameters. + */ +const struct fscache_cookie_def nfs_fscache_server_index_def = { + .name = "NFS.server", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = nfs_server_get_key, +}; + +/* + * Generate a key to describe a superblock key in the main NFS index + */ +static uint16_t nfs_super_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_fscache_key *key; + const struct nfs_server *nfss = cookie_netfs_data; + uint16_t len; + + key = nfss->fscache_key; + len = sizeof(key->key) + key->key.uniq_len; + if (len > bufmax) { + len = 0; + } else { + memcpy(buffer, &key->key, sizeof(key->key)); + memcpy(buffer + sizeof(key->key), + key->key.uniquifier, key->key.uniq_len); + } + + return len; +} + +/* + * Define the superblock object for FS-Cache. This is used to describe a + * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS + * parameters that might cause a separate superblock. + */ +const struct fscache_cookie_def nfs_fscache_super_index_def = { + .name = "NFS.super", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = nfs_super_get_key, +}; + +/* + * Definition of the auxiliary data attached to NFS inode storage objects + * within the cache. + * + * The contents of this struct are recorded in the on-disk local cache in the + * auxiliary data attached to the data storage object backing an inode. This + * permits coherency to be managed when a new inode binds to an already extant + * cache object. + */ +struct nfs_fscache_inode_auxdata { + struct timespec mtime; + struct timespec ctime; + loff_t size; + u64 change_attr; +}; + +/* + * Generate a key to describe an NFS inode in an NFS server's index + */ +static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + uint16_t nsize; + + /* use the inode's NFS filehandle as the key */ + nsize = nfsi->fh.size; + memcpy(buffer, nfsi->fh.data, nsize); + return nsize; +} + +/* + * Get certain file attributes from the netfs data + * - This function can be absent for an index + * - Not permitted to return an error + * - The netfs data from the cookie being used as the source is presented + */ +static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data, + uint64_t *size) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + + *size = nfsi->vfs_inode.i_size; +} + +/* + * Get the auxiliary data from netfs data + * - This function can be absent if the index carries no state data + * - Should store the auxiliary data in the buffer + * - Should return the amount of amount stored + * - Not permitted to return an error + * - The netfs data from the cookie being used as the source is presented + */ +static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + struct nfs_fscache_inode_auxdata auxdata; + const struct nfs_inode *nfsi = cookie_netfs_data; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.size = nfsi->vfs_inode.i_size; + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata.change_attr = nfsi->change_attr; + + if (bufmax > sizeof(auxdata)) + bufmax = sizeof(auxdata); + + memcpy(buffer, &auxdata, bufmax); + return bufmax; +} + +/* + * Consult the netfs about the state of an object + * - This function can be absent if the index carries no state data + * - The netfs data from the cookie being used as the target is + * presented, as is the auxiliary data + */ +static +enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct nfs_fscache_inode_auxdata auxdata; + struct nfs_inode *nfsi = cookie_netfs_data; + + if (datalen != sizeof(auxdata)) + return FSCACHE_CHECKAUX_OBSOLETE; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.size = nfsi->vfs_inode.i_size; + auxdata.mtime = nfsi->vfs_inode.i_mtime; + auxdata.ctime = nfsi->vfs_inode.i_ctime; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata.change_attr = nfsi->change_attr; + + if (memcmp(data, &auxdata, datalen) != 0) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +/* + * Indication from FS-Cache that the cookie is no longer cached + * - This function is called when the backing store currently caching a cookie + * is removed + * - The netfs should use this to clean up any markers indicating cached pages + * - This is mandatory for any object that may have data + */ +static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data) +{ + struct nfs_inode *nfsi = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + pagevec_init(&pvec, 0); + first = 0; + + dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi); + + for (;;) { + /* grab a bunch of pages to unmark */ + nr_pages = pagevec_lookup(&pvec, + nfsi->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPageFsCache(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } +} + +/* + * Get an extra reference on a read context. + * - This function can be absent if the completion function doesn't require a + * context. + * - The read context is passed back to NFS in the event that a data read on the + * cache fails with EIO - in which case the server must be contacted to + * retrieve the data, which requires the read context for security. + */ +static void nfs_fh_get_context(void *cookie_netfs_data, void *context) +{ + get_nfs_open_context(context); +} + +/* + * Release an extra reference on a read context. + * - This function can be absent if the completion function doesn't require a + * context. + */ +static void nfs_fh_put_context(void *cookie_netfs_data, void *context) +{ + if (context) + put_nfs_open_context(context); +} + +/* + * Define the inode object for FS-Cache. This is used to describe an inode + * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for + * an inode. + * + * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime + * held in the cache auxiliary data for the data storage object with those in + * the inode struct in memory. + */ +const struct fscache_cookie_def nfs_fscache_inode_object_def = { + .name = "NFS.fh", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = nfs_fscache_inode_get_key, + .get_attr = nfs_fscache_inode_get_attr, + .get_aux = nfs_fscache_inode_get_aux, + .check_aux = nfs_fscache_inode_check_aux, + .now_uncached = nfs_fscache_inode_now_uncached, + .get_context = nfs_fh_get_context, + .put_context = nfs_fh_put_context, +}; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c new file mode 100644 index 00000000000..379be678cb7 --- /dev/null +++ b/fs/nfs/fscache.c @@ -0,0 +1,523 @@ +/* NFS filesystem cache interface + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/in6.h> +#include <linux/seq_file.h> + +#include "internal.h" +#include "iostat.h" +#include "fscache.h" + +#define NFSDBG_FACILITY NFSDBG_FSCACHE + +static struct rb_root nfs_fscache_keys = RB_ROOT; +static DEFINE_SPINLOCK(nfs_fscache_keys_lock); + +/* + * Get the per-client index cookie for an NFS client if the appropriate mount + * flag was set + * - We always try and get an index cookie for the client, but get filehandle + * cookies on a per-superblock basis, depending on the mount flags + */ +void nfs_fscache_get_client_cookie(struct nfs_client *clp) +{ + /* create a cache index for looking up filehandles */ + clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, + &nfs_fscache_server_index_def, + clp); + dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", + clp, clp->fscache); +} + +/* + * Dispose of a per-client cookie + */ +void nfs_fscache_release_client_cookie(struct nfs_client *clp) +{ + dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n", + clp, clp->fscache); + + fscache_relinquish_cookie(clp->fscache, 0); + clp->fscache = NULL; +} + +/* + * Get the cache cookie for an NFS superblock. We have to handle + * uniquification here because the cache doesn't do it for us. + */ +void nfs_fscache_get_super_cookie(struct super_block *sb, + struct nfs_parsed_mount_data *data) +{ + struct nfs_fscache_key *key, *xkey; + struct nfs_server *nfss = NFS_SB(sb); + struct rb_node **p, *parent; + const char *uniq = data->fscache_uniq ?: ""; + int diff, ulen; + + ulen = strlen(uniq); + key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); + if (!key) + return; + + key->nfs_client = nfss->nfs_client; + key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; + key->key.nfs_server.flags = nfss->flags; + key->key.nfs_server.rsize = nfss->rsize; + key->key.nfs_server.wsize = nfss->wsize; + key->key.nfs_server.acregmin = nfss->acregmin; + key->key.nfs_server.acregmax = nfss->acregmax; + key->key.nfs_server.acdirmin = nfss->acdirmin; + key->key.nfs_server.acdirmax = nfss->acdirmax; + key->key.nfs_server.fsid = nfss->fsid; + key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor; + + key->key.uniq_len = ulen; + memcpy(key->key.uniquifier, uniq, ulen); + + spin_lock(&nfs_fscache_keys_lock); + p = &nfs_fscache_keys.rb_node; + parent = NULL; + while (*p) { + parent = *p; + xkey = rb_entry(parent, struct nfs_fscache_key, node); + + if (key->nfs_client < xkey->nfs_client) + goto go_left; + if (key->nfs_client > xkey->nfs_client) + goto go_right; + + diff = memcmp(&key->key, &xkey->key, sizeof(key->key)); + if (diff < 0) + goto go_left; + if (diff > 0) + goto go_right; + + if (key->key.uniq_len == 0) + goto non_unique; + diff = memcmp(key->key.uniquifier, + xkey->key.uniquifier, + key->key.uniq_len); + if (diff < 0) + goto go_left; + if (diff > 0) + goto go_right; + goto non_unique; + + go_left: + p = &(*p)->rb_left; + continue; + go_right: + p = &(*p)->rb_right; + } + + rb_link_node(&key->node, parent, p); + rb_insert_color(&key->node, &nfs_fscache_keys); + spin_unlock(&nfs_fscache_keys_lock); + nfss->fscache_key = key; + + /* create a cache index for looking up filehandles */ + nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, + &nfs_fscache_super_index_def, + nfss); + dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", + nfss, nfss->fscache); + return; + +non_unique: + spin_unlock(&nfs_fscache_keys_lock); + kfree(key); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + printk(KERN_WARNING "NFS:" + " Cache request denied due to non-unique superblock keys\n"); +} + +/* + * release a per-superblock cookie + */ +void nfs_fscache_release_super_cookie(struct super_block *sb) +{ + struct nfs_server *nfss = NFS_SB(sb); + + dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", + nfss, nfss->fscache); + + fscache_relinquish_cookie(nfss->fscache, 0); + nfss->fscache = NULL; + + if (nfss->fscache_key) { + spin_lock(&nfs_fscache_keys_lock); + rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys); + spin_unlock(&nfs_fscache_keys_lock); + kfree(nfss->fscache_key); + nfss->fscache_key = NULL; + } +} + +/* + * Initialise the per-inode cache cookie pointer for an NFS inode. + */ +void nfs_fscache_init_inode_cookie(struct inode *inode) +{ + NFS_I(inode)->fscache = NULL; + if (S_ISREG(inode->i_mode)) + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +} + +/* + * Get the per-inode cache cookie for an NFS inode. + */ +static void nfs_fscache_enable_inode_cookie(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->fscache || !NFS_FSCACHE(inode)) + return; + + if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { + nfsi->fscache = fscache_acquire_cookie( + NFS_SB(sb)->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", + sb, nfsi, nfsi->fscache); + } +} + +/* + * Release a per-inode cookie. + */ +void nfs_fscache_release_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 0); + nfsi->fscache = NULL; +} + +/* + * Retire a per-inode cookie, destroying the data attached to it. + */ +void nfs_fscache_zap_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 1); + nfsi->fscache = NULL; +} + +/* + * Turn off the cache with regard to a per-inode cookie if opened for writing, + * invalidating all the pages in the page cache relating to the associated + * inode to clear the per-page caching. + */ +static void nfs_fscache_disable_inode_cookie(struct inode *inode) +{ + clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); + + if (NFS_I(inode)->fscache) { + dfprintk(FSCACHE, + "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); + + /* Need to invalidate any mapped pages that were read in before + * turning off the cache. + */ + if (inode->i_mapping && inode->i_mapping->nrpages) + invalidate_inode_pages2(inode->i_mapping); + + nfs_fscache_zap_inode_cookie(inode); + } +} + +/* + * wait_on_bit() sleep function for uninterruptible waiting + */ +static int nfs_fscache_wait_bit(void *flags) +{ + schedule(); + return 0; +} + +/* + * Lock against someone else trying to also acquire or relinquish a cookie + */ +static inline void nfs_fscache_inode_lock(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) + wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, + nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); +} + +/* + * Unlock cookie management lock + */ +static inline void nfs_fscache_inode_unlock(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + smp_mb__before_clear_bit(); + clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); +} + +/* + * Decide if we should enable or disable local caching for this inode. + * - For now, with NFS, only regular files that are open read-only will be able + * to use the cache. + * - May be invoked multiple times in parallel by parallel nfs_open() functions. + */ +void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) +{ + if (NFS_FSCACHE(inode)) { + nfs_fscache_inode_lock(inode); + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + nfs_fscache_disable_inode_cookie(inode); + else + nfs_fscache_enable_inode_cookie(inode); + nfs_fscache_inode_unlock(inode); + } +} + +/* + * Replace a per-inode cookie due to revalidation detecting a file having + * changed on the server. + */ +void nfs_fscache_reset_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server *nfss = NFS_SERVER(inode); + struct fscache_cookie *old = nfsi->fscache; + + nfs_fscache_inode_lock(inode); + if (nfsi->fscache) { + /* retire the current fscache cache and get a new one */ + fscache_relinquish_cookie(nfsi->fscache, 1); + + nfsi->fscache = fscache_acquire_cookie( + nfss->nfs_client->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, + "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", + nfss, nfsi, old, nfsi->fscache); + } + nfs_fscache_inode_unlock(inode); +} + +/* + * Release the caching state associated with a page, if the page isn't busy + * interacting with the cache. + * - Returns true (can release page) or false (page busy). + */ +int nfs_fscache_release_page(struct page *page, gfp_t gfp) +{ + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); + + if (fscache_check_page_write(cookie, page)) { + if (!(gfp & __GFP_WAIT)) + return 0; + fscache_wait_on_page_write(cookie, page); + } + + if (PageFsCache(page)) { + dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", + cookie, page, nfsi); + + fscache_uncache_page(cookie, page); + nfs_add_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + } + + return 1; +} + +/* + * Release the caching state associated with a page if undergoing complete page + * invalidation. + */ +void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); + + dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", + cookie, page, nfsi); + + fscache_wait_on_page_write(cookie, page); + + BUG_ON(!PageLocked(page)); + fscache_uncache_page(cookie, page); + nfs_add_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED, 1); +} + +/* + * Handle completion of a page being read from the cache. + * - Called in process (keventd) context. + */ +static void nfs_readpage_from_fscache_complete(struct page *page, + void *context, + int error) +{ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", + page, context, error); + + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) { + SetPageUptodate(page); + unlock_page(page); + } else { + error = nfs_readpage_async(context, page->mapping->host, page); + if (error) + unlock_page(page); + } +} + +/* + * Retrieve a page from fscache + */ +int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, struct page *page) +{ + int ret; + + dfprintk(FSCACHE, + "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, inode); + + ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, + page, + nfs_readpage_from_fscache_complete, + ctx, + GFP_KERNEL); + + switch (ret) { + case 0: /* read BIO submitted (page in fscache) */ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache: BIO submitted\n"); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + return ret; + + case -ENOBUFS: /* inode not in cache */ + case -ENODATA: /* page not in cache */ + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + dfprintk(FSCACHE, + "NFS: readpage_from_fscache %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + } + return ret; +} + +/* + * Retrieve a set of pages from fscache + */ +int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + int ret, npages = *nr_pages; + + dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", + NFS_I(inode)->fscache, npages, inode); + + ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, + mapping, pages, nr_pages, + nfs_readpage_from_fscache_complete, + ctx, + mapping_gfp_mask(mapping)); + if (*nr_pages < npages) + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, + npages); + if (*nr_pages > 0) + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, + *nr_pages); + + switch (ret) { + case 0: /* read submitted to the cache for all pages */ + BUG_ON(!list_empty(pages)); + BUG_ON(*nr_pages != 0); + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: submitted\n"); + + return ret; + + case -ENOBUFS: /* some pages aren't cached and can't be */ + case -ENODATA: /* some pages aren't cached */ + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: ret %d\n", ret); + } + + return ret; +} + +/* + * Store a newly fetched page in fscache + * - PG_fscache must be set on the page + */ +void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) +{ + int ret; + + dfprintk(FSCACHE, + "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, sync); + + ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); + dfprintk(FSCACHE, + "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", + page, page->index, page->flags, ret); + + if (ret != 0) { + fscache_uncache_page(NFS_I(inode)->fscache, page); + nfs_add_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); + nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + } else { + nfs_add_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + } +} diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h new file mode 100644 index 00000000000..6e809bb0ff0 --- /dev/null +++ b/fs/nfs/fscache.h @@ -0,0 +1,220 @@ +/* NFS filesystem cache interface definitions + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _NFS_FSCACHE_H +#define _NFS_FSCACHE_H + +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include <linux/fscache.h> + +#ifdef CONFIG_NFS_FSCACHE + +/* + * set of NFS FS-Cache objects that form a superblock key + */ +struct nfs_fscache_key { + struct rb_node node; + struct nfs_client *nfs_client; /* the server */ + + /* the elements of the unique key - as used by nfs_compare_super() and + * nfs_compare_mount_options() to distinguish superblocks */ + struct { + struct { + unsigned long s_flags; /* various flags + * (& NFS_MS_MASK) */ + } super; + + struct { + struct nfs_fsid fsid; + int flags; + unsigned int rsize; /* read size */ + unsigned int wsize; /* write size */ + unsigned int acregmin; /* attr cache timeouts */ + unsigned int acregmax; + unsigned int acdirmin; + unsigned int acdirmax; + } nfs_server; + + struct { + rpc_authflavor_t au_flavor; + } rpc_auth; + + /* uniquifier - can be used if nfs_server.flags includes + * NFS_MOUNT_UNSHARED */ + u8 uniq_len; + char uniquifier[0]; + } key; +}; + +/* + * fscache-index.c + */ +extern struct fscache_netfs nfs_fscache_netfs; +extern const struct fscache_cookie_def nfs_fscache_server_index_def; +extern const struct fscache_cookie_def nfs_fscache_super_index_def; +extern const struct fscache_cookie_def nfs_fscache_inode_object_def; + +extern int nfs_fscache_register(void); +extern void nfs_fscache_unregister(void); + +/* + * fscache.c + */ +extern void nfs_fscache_get_client_cookie(struct nfs_client *); +extern void nfs_fscache_release_client_cookie(struct nfs_client *); + +extern void nfs_fscache_get_super_cookie(struct super_block *, + struct nfs_parsed_mount_data *); +extern void nfs_fscache_release_super_cookie(struct super_block *); + +extern void nfs_fscache_init_inode_cookie(struct inode *); +extern void nfs_fscache_release_inode_cookie(struct inode *); +extern void nfs_fscache_zap_inode_cookie(struct inode *); +extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); +extern void nfs_fscache_reset_inode_cookie(struct inode *); + +extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); +extern int nfs_fscache_release_page(struct page *, gfp_t); + +extern int __nfs_readpage_from_fscache(struct nfs_open_context *, + struct inode *, struct page *); +extern int __nfs_readpages_from_fscache(struct nfs_open_context *, + struct inode *, struct address_space *, + struct list_head *, unsigned *); +extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); + +/* + * wait for a page to complete writing to the cache + */ +static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, + struct page *page) +{ + if (PageFsCache(page)) + fscache_wait_on_page_write(nfsi->fscache, page); +} + +/* + * release the caching state associated with a page if undergoing complete page + * invalidation + */ +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode) +{ + if (PageFsCache(page)) + __nfs_fscache_invalidate_page(page, inode); +} + +/* + * Retrieve a page from an inode data storage object. + */ +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page) +{ + if (NFS_I(inode)->fscache) + return __nfs_readpage_from_fscache(ctx, inode, page); + return -ENOBUFS; +} + +/* + * Retrieve a set of pages from an inode data storage object. + */ +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + if (NFS_I(inode)->fscache) + return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, + nr_pages); + return -ENOBUFS; +} + +/* + * Store a page newly fetched from the server in an inode data storage object + * in the cache. + */ +static inline void nfs_readpage_to_fscache(struct inode *inode, + struct page *page, + int sync) +{ + if (PageFsCache(page)) + __nfs_readpage_to_fscache(inode, page, sync); +} + +/* + * indicate the client caching state as readable text + */ +static inline const char *nfs_server_fscache_state(struct nfs_server *server) +{ + if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) + return "yes"; + return "no "; +} + + +#else /* CONFIG_NFS_FSCACHE */ +static inline int nfs_fscache_register(void) { return 0; } +static inline void nfs_fscache_unregister(void) {} + +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} + +static inline void nfs_fscache_get_super_cookie( + struct super_block *sb, + struct nfs_parsed_mount_data *data) +{ +} +static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} + +static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_set_inode_cookie(struct inode *inode, + struct file *filp) {} +static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} + +static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) +{ + return 1; /* True: may release page */ +} +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode) {} +static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, + struct page *page) {} + +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page) +{ + return -ENOBUFS; +} +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return -ENOBUFS; +} +static inline void nfs_readpage_to_fscache(struct inode *inode, + struct page *page, int sync) {} + +static inline const char *nfs_server_fscache_state(struct nfs_server *server) +{ + return "no "; +} + +#endif /* CONFIG_NFS_FSCACHE */ +#endif /* _NFS_FSCACHE_H */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a834d1d850b..64f87194d39 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -46,6 +46,7 @@ #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -121,6 +122,7 @@ void nfs_clear_inode(struct inode *inode) BUG_ON(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); + nfs_fscache_release_inode_cookie(inode); } /** @@ -355,6 +357,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attrtimeo_timestamp = now; nfsi->access_cache = RB_ROOT; + nfs_fscache_init_inode_cookie(inode); + unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); @@ -686,6 +690,7 @@ int nfs_open(struct inode *inode, struct file *filp) ctx->mode = filp->f_mode; nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); + nfs_fscache_set_inode_cookie(inode, filp); return 0; } @@ -786,6 +791,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + nfs_fscache_reset_inode_cookie(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -1030,6 +1036,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); status = nfs_refresh_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); + return status; } @@ -1436,6 +1443,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_fscache_register(); + if (err < 0) + goto out7; + err = nfsiod_start(); if (err) goto out6; @@ -1488,6 +1499,8 @@ out4: out5: nfsiod_stop(); out6: + nfs_fscache_unregister(); +out7: return err; } @@ -1498,6 +1511,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_readpagecache(); nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); + nfs_fscache_unregister(); #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2041f68ff1c..e4d6a8348ad 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -5,6 +5,8 @@ #include <linux/mount.h> #include <linux/security.h> +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + struct nfs_string; /* Maximum number of readahead requests @@ -37,10 +39,12 @@ struct nfs_parsed_mount_data { int acregmin, acregmax, acdirmin, acdirmax; int namlen; + unsigned int options; unsigned int bsize; unsigned int auth_flavor_len; rpc_authflavor_t auth_flavors[1]; char *client_address; + char *fscache_uniq; struct { struct sockaddr_storage address; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index a3695281003..a2ab2529b5c 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -16,6 +16,9 @@ struct nfs_iostats { unsigned long long bytes[__NFSIOS_BYTESMAX]; +#ifdef CONFIG_NFS_FSCACHE + unsigned long long fscache[__NFSIOS_FSCACHEMAX]; +#endif unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; @@ -57,6 +60,21 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } +#ifdef CONFIG_NFS_FSCACHE +static inline void nfs_add_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat, + unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->fscache[stat] += addend; + put_cpu_no_resched(); +} +#endif + static inline struct nfs_iostats *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f856004bb7f..4ace3c50a8e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -24,6 +24,7 @@ #include "internal.h" #include "iostat.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -111,8 +112,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, - struct page *page) +int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) { LIST_HEAD(one_request); struct nfs_page *new; @@ -139,6 +140,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, static void nfs_readpage_release(struct nfs_page *req) { + struct inode *d_inode = req->wb_context->path.dentry->d_inode; + + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", @@ -510,8 +516,15 @@ int nfs_readpage(struct file *file, struct page *page) } else ctx = get_nfs_open_context(nfs_file_open_context(file)); + if (!IS_SYNC(inode)) { + error = nfs_readpage_from_fscache(ctx, inode, page); + if (error == 0) + goto out; + } + error = nfs_readpage_async(ctx, inode, page); +out: put_nfs_open_context(ctx); return error; out_unlock: @@ -584,6 +597,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, return -EBADF; } else desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); + + /* attempt to read as many of the pages as possible from the cache + * - this returns -ENOBUFS immediately if the cookie is negative + */ + ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, + pages, &nr_pages); + if (ret == 0) + goto read_complete; /* all pages were read */ + if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else @@ -594,6 +616,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, nfs_pageio_complete(&pgio); npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); +read_complete: put_nfs_open_context(desc.ctx); out: return ret; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0942fcbbad3..82eaadbff40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -60,6 +60,7 @@ #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -76,6 +77,7 @@ enum { Opt_rdirplus, Opt_nordirplus, Opt_sharecache, Opt_nosharecache, Opt_resvport, Opt_noresvport, + Opt_fscache, Opt_nofscache, /* Mount options that take integer arguments */ Opt_port, @@ -93,6 +95,7 @@ enum { Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, + Opt_fscache_uniq, /* Special mount options */ Opt_userspace, Opt_deprecated, Opt_sloppy, @@ -132,6 +135,9 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_nosharecache, "nosharecache" }, { Opt_resvport, "resvport" }, { Opt_noresvport, "noresvport" }, + { Opt_fscache, "fsc" }, + { Opt_fscache_uniq, "fsc=%s" }, + { Opt_nofscache, "nofsc" }, { Opt_port, "port=%u" }, { Opt_rsize, "rsize=%u" }, @@ -563,6 +569,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (clp->rpc_ops->version == 4) seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); #endif + if (nfss->options & NFS_OPTION_FSCACHE) + seq_printf(m, ",fsc"); } /* @@ -641,6 +649,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) totals.events[i] += stats->events[i]; for (i = 0; i < __NFSIOS_BYTESMAX; i++) totals.bytes[i] += stats->bytes[i]; +#ifdef CONFIG_NFS_FSCACHE + for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) + totals.fscache[i] += stats->fscache[i]; +#endif preempt_enable(); } @@ -651,6 +663,13 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); +#ifdef CONFIG_NFS_FSCACHE + if (nfss->options & NFS_OPTION_FSCACHE) { + seq_printf(m, "\n\tfsc:\t"); + for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + } +#endif seq_printf(m, "\n"); rpc_print_iostats(m, nfss->client); @@ -1044,6 +1063,24 @@ static int nfs_parse_mount_options(char *raw, case Opt_noresvport: mnt->flags |= NFS_MOUNT_NORESVPORT; break; + case Opt_fscache: + mnt->options |= NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_nofscache: + mnt->options &= ~NFS_OPTION_FSCACHE; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = NULL; + break; + case Opt_fscache_uniq: + string = match_strdup(args); + if (!string) + goto out_nomem; + kfree(mnt->fscache_uniq); + mnt->fscache_uniq = string; + mnt->options |= NFS_OPTION_FSCACHE; + break; /* * options that take numeric values @@ -1870,8 +1907,6 @@ static void nfs_clone_super(struct super_block *sb, nfs_initialise_sb(sb); } -#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) - static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) { const struct nfs_server *a = s->s_fs_info; @@ -2036,6 +2071,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs_fill_super(s, data); + nfs_fscache_get_super_cookie(s, data); } mntroot = nfs_get_root(s, mntfh); @@ -2056,6 +2092,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, out: kfree(data->nfs_server.hostname); kfree(data->mount_server.hostname); + kfree(data->fscache_uniq); security_free_mnt_opts(&data->lsm_opts); out_free_fh: kfree(mntfh); @@ -2083,6 +2120,7 @@ static void nfs_kill_super(struct super_block *s) bdi_unregister(&server->backing_dev_info); kill_anon_super(s); + nfs_fscache_release_super_cookie(s); nfs_free_server(server); } @@ -2390,6 +2428,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); + nfs_fscache_get_super_cookie(s, data); } mntroot = nfs4_get_root(s, mntfh); @@ -2411,6 +2450,7 @@ out: kfree(data->client_address); kfree(data->nfs_server.export_path); kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); security_free_mnt_opts(&data->lsm_opts); out_free_fh: kfree(mntfh); @@ -2437,6 +2477,7 @@ static void nfs4_kill_super(struct super_block *sb) kill_anon_super(sb); nfs4_renewd_prepare_shutdown(server); + nfs_fscache_release_super_cookie(sb); nfs_free_server(server); } diff --git a/fs/splice.c b/fs/splice.c index 4ed0ba44a96..dd727d43e5b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -59,7 +59,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ wait_on_page_writeback(page); - if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + if (page_has_private(page) && + !try_to_release_page(page, GFP_KERNEL)) goto out_unlock; /* diff --git a/fs/super.c b/fs/super.c index 2ba481518ba..77cb4ec919b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -287,6 +287,7 @@ int fsync_super(struct super_block *sb) __fsync_super(sb); return sync_blockdev(sb->s_bdev); } +EXPORT_SYMBOL_GPL(fsync_super); /** * generic_shutdown_super - common helper for ->kill_sb() |