diff options
Diffstat (limited to 'drivers/staging/zcache/tmem.c')
-rw-r--r-- | drivers/staging/zcache/tmem.c | 898 |
1 files changed, 0 insertions, 898 deletions
diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c deleted file mode 100644 index d7e51e4152e..00000000000 --- a/drivers/staging/zcache/tmem.c +++ /dev/null @@ -1,898 +0,0 @@ -/* - * In-kernel transcendent memory (generic implementation) - * - * Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp. - * - * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented - * "handles" (triples containing a pool id, and object id, and an index), to - * pages in a page-accessible memory (PAM). Tmem references the PAM pages via - * an abstract "pampd" (PAM page-descriptor), which can be operated on by a - * set of functions (pamops). Each pampd contains some representation of - * PAGE_SIZE bytes worth of data. For those familiar with key-value stores, - * the tmem handle is a three-level hierarchical key, and the value is always - * reconstituted (but not necessarily stored) as PAGE_SIZE bytes and is - * referenced in the datastore by the pampd. The hierarchy is required - * to ensure that certain invalidation functions can be performed efficiently - * (i.e. flush all indexes associated with this object_id, or - * flush all objects associated with this pool). - * - * Tmem must support potentially millions of pages and must be able to insert, - * find, and delete these pages at a potential frequency of thousands per - * second concurrently across many CPUs, (and, if used with KVM, across many - * vcpus across many guests). Tmem is tracked with a hierarchy of data - * structures, organized by the elements in the handle-tuple: pool_id, - * object_id, and page index. One or more "clients" (e.g. guests) each - * provide one or more tmem_pools. Each pool, contains a hash table of - * rb_trees of tmem_objs. Each tmem_obj contains a radix-tree-like tree - * of pointers, with intermediate nodes called tmem_objnodes. Each leaf - * pointer in this tree points to a pampd, which is accessible only through - * a small set of callbacks registered by the PAM implementation (see - * tmem_register_pamops). Tmem only needs to memory allocation for objs - * and objnodes and this is done via a set of callbacks that must be - * registered by the tmem host implementation (e.g. see tmem_register_hostops). - */ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/atomic.h> -#include <linux/export.h> -#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) -#include <linux/delay.h> -#endif - -#include "tmem.h" - -/* data structure sentinels used for debugging... see tmem.h */ -#define POOL_SENTINEL 0x87658765 -#define OBJ_SENTINEL 0x12345678 -#define OBJNODE_SENTINEL 0xfedcba09 - -/* - * A tmem host implementation must use this function to register callbacks - * for memory allocation. - */ -static struct tmem_hostops tmem_hostops; - -static void tmem_objnode_tree_init(void); - -void tmem_register_hostops(struct tmem_hostops *m) -{ - tmem_objnode_tree_init(); - tmem_hostops = *m; -} - -/* - * A tmem host implementation must use this function to register - * callbacks for a page-accessible memory (PAM) implementation. - */ -static struct tmem_pamops tmem_pamops; - -void tmem_register_pamops(struct tmem_pamops *m) -{ - tmem_pamops = *m; -} - -/* - * Oid's are potentially very sparse and tmem_objs may have an indeterminately - * short life, being added and deleted at a relatively high frequency. - * So an rb_tree is an ideal data structure to manage tmem_objs. But because - * of the potentially huge number of tmem_objs, each pool manages a hashtable - * of rb_trees to reduce search, insert, delete, and rebalancing time. - * Each hashbucket also has a lock to manage concurrent access and no - * searches, inserts, or deletions can be performed unless the lock is held. - * As a result, care must be taken to ensure tmem routines are not called - * recursively; the vast majority of the time, a recursive call may work - * but a deadlock will occur a small fraction of the time due to the - * hashbucket lock. - * - * The following routines manage tmem_objs. In all of these routines, - * the hashbucket lock is already held. - */ - -/* Search for object==oid in pool, returns object if found. */ -static struct tmem_obj *__tmem_obj_find(struct tmem_hashbucket *hb, - struct tmem_oid *oidp, - struct rb_node **parent, - struct rb_node ***link) -{ - struct rb_node *_parent = NULL, **rbnode; - struct tmem_obj *obj = NULL; - - rbnode = &hb->obj_rb_root.rb_node; - while (*rbnode) { - BUG_ON(RB_EMPTY_NODE(*rbnode)); - _parent = *rbnode; - obj = rb_entry(*rbnode, struct tmem_obj, - rb_tree_node); - switch (tmem_oid_compare(oidp, &obj->oid)) { - case 0: /* equal */ - goto out; - case -1: - rbnode = &(*rbnode)->rb_left; - break; - case 1: - rbnode = &(*rbnode)->rb_right; - break; - } - } - - if (parent) - *parent = _parent; - if (link) - *link = rbnode; - obj = NULL; -out: - return obj; -} - -static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, - struct tmem_oid *oidp) -{ - return __tmem_obj_find(hb, oidp, NULL, NULL); -} - -static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *, bool); - -/* Free an object that has no more pampds in it. */ -static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) -{ - struct tmem_pool *pool; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pampd_count > 0); - pool = obj->pool; - BUG_ON(pool == NULL); - if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ - tmem_pampd_destroy_all_in_obj(obj, false); - BUG_ON(obj->objnode_tree_root != NULL); - BUG_ON((long)obj->objnode_count != 0); - atomic_dec(&pool->obj_count); - BUG_ON(atomic_read(&pool->obj_count) < 0); - INVERT_SENTINEL(obj, OBJ); - obj->pool = NULL; - tmem_oid_set_invalid(&obj->oid); - rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); -} - -/* - * Initialize, and insert an tmem_object_root (called only if find failed). - */ -static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, - struct tmem_pool *pool, - struct tmem_oid *oidp) -{ - struct rb_root *root = &hb->obj_rb_root; - struct rb_node **new = NULL, *parent = NULL; - - BUG_ON(pool == NULL); - atomic_inc(&pool->obj_count); - obj->objnode_tree_height = 0; - obj->objnode_tree_root = NULL; - obj->pool = pool; - obj->oid = *oidp; - obj->objnode_count = 0; - obj->pampd_count = 0; -#ifdef CONFIG_RAMSTER - if (tmem_pamops.new_obj != NULL) - (*tmem_pamops.new_obj)(obj); -#endif - SET_SENTINEL(obj, OBJ); - - if (__tmem_obj_find(hb, oidp, &parent, &new)) - BUG(); - - rb_link_node(&obj->rb_tree_node, parent, new); - rb_insert_color(&obj->rb_tree_node, root); -} - -/* - * Tmem is managed as a set of tmem_pools with certain attributes, such as - * "ephemeral" vs "persistent". These attributes apply to all tmem_objs - * and all pampds that belong to a tmem_pool. A tmem_pool is created - * or deleted relatively rarely (for example, when a filesystem is - * mounted or unmounted). - */ - -/* flush all data from a pool and, optionally, free it */ -static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) -{ - struct rb_node *rbnode; - struct tmem_obj *obj; - struct tmem_hashbucket *hb = &pool->hashbucket[0]; - int i; - - BUG_ON(pool == NULL); - for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { - spin_lock(&hb->lock); - rbnode = rb_first(&hb->obj_rb_root); - while (rbnode != NULL) { - obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); - rbnode = rb_next(rbnode); - tmem_pampd_destroy_all_in_obj(obj, true); - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - } - spin_unlock(&hb->lock); - } - if (destroy) - list_del(&pool->pool_list); -} - -/* - * A tmem_obj contains a radix-tree-like tree in which the intermediate - * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation - * is very specialized and tuned for specific uses and is not particularly - * suited for use from this code, though some code from the core algorithms has - * been reused, thus the copyright notices below). Each tmem_objnode contains - * a set of pointers which point to either a set of intermediate tmem_objnodes - * or a set of of pampds. - * - * Portions Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> - */ - -struct tmem_objnode_tree_path { - struct tmem_objnode *objnode; - int offset; -}; - -/* objnode height_to_maxindex translation */ -static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; - -static void tmem_objnode_tree_init(void) -{ - unsigned int ht, tmp; - - for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { - tmp = ht * OBJNODE_TREE_MAP_SHIFT; - if (tmp >= OBJNODE_TREE_INDEX_BITS) - tmem_objnode_tree_h2max[ht] = ~0UL; - else - tmem_objnode_tree_h2max[ht] = - (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; - } -} - -static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) -{ - struct tmem_objnode *objnode; - - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - objnode = (*tmem_hostops.objnode_alloc)(obj->pool); - if (unlikely(objnode == NULL)) - goto out; - objnode->obj = obj; - SET_SENTINEL(objnode, OBJNODE); - memset(&objnode->slots, 0, sizeof(objnode->slots)); - objnode->slots_in_use = 0; - obj->objnode_count++; -out: - return objnode; -} - -static void tmem_objnode_free(struct tmem_objnode *objnode) -{ - struct tmem_pool *pool; - int i; - - BUG_ON(objnode == NULL); - for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) - BUG_ON(objnode->slots[i] != NULL); - ASSERT_SENTINEL(objnode, OBJNODE); - INVERT_SENTINEL(objnode, OBJNODE); - BUG_ON(objnode->obj == NULL); - ASSERT_SENTINEL(objnode->obj, OBJ); - pool = objnode->obj->pool; - BUG_ON(pool == NULL); - ASSERT_SENTINEL(pool, POOL); - objnode->obj->objnode_count--; - objnode->obj = NULL; - (*tmem_hostops.objnode_free)(objnode, pool); -} - -/* - * Lookup index in object and return associated pampd (or NULL if not found). - */ -static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) -{ - unsigned int height, shift; - struct tmem_objnode **slot = NULL; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - - height = obj->objnode_tree_height; - if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) - goto out; - if (height == 0 && obj->objnode_tree_root) { - slot = &obj->objnode_tree_root; - goto out; - } - shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; - slot = &obj->objnode_tree_root; - while (height > 0) { - if (*slot == NULL) - goto out; - slot = (struct tmem_objnode **) - ((*slot)->slots + - ((index >> shift) & OBJNODE_TREE_MAP_MASK)); - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } -out: - return slot != NULL ? (void **)slot : NULL; -} - -static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) -{ - struct tmem_objnode **slot; - - slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); - return slot != NULL ? *slot : NULL; -} - -#ifdef CONFIG_RAMSTER -static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, - void *new_pampd, bool no_free) -{ - struct tmem_objnode **slot; - void *ret = NULL; - - slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); - if ((slot != NULL) && (*slot != NULL)) { - void *old_pampd = *(void **)slot; - *(void **)slot = new_pampd; - if (!no_free) - (*tmem_pamops.free)(old_pampd, obj->pool, - NULL, 0, false); - ret = new_pampd; - } - return ret; -} -#endif - -static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, - void *pampd) -{ - int ret = 0; - struct tmem_objnode *objnode = NULL, *newnode, *slot; - unsigned int height, shift; - int offset = 0; - - /* if necessary, extend the tree to be higher */ - if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { - height = obj->objnode_tree_height + 1; - if (index > tmem_objnode_tree_h2max[height]) - while (index > tmem_objnode_tree_h2max[height]) - height++; - if (obj->objnode_tree_root == NULL) { - obj->objnode_tree_height = height; - goto insert; - } - do { - newnode = tmem_objnode_alloc(obj); - if (!newnode) { - ret = -ENOMEM; - goto out; - } - newnode->slots[0] = obj->objnode_tree_root; - newnode->slots_in_use = 1; - obj->objnode_tree_root = newnode; - obj->objnode_tree_height++; - } while (height > obj->objnode_tree_height); - } -insert: - slot = obj->objnode_tree_root; - height = obj->objnode_tree_height; - shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; - while (height > 0) { - if (slot == NULL) { - /* add a child objnode. */ - slot = tmem_objnode_alloc(obj); - if (!slot) { - ret = -ENOMEM; - goto out; - } - if (objnode) { - - objnode->slots[offset] = slot; - objnode->slots_in_use++; - } else - obj->objnode_tree_root = slot; - } - /* go down a level */ - offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; - objnode = slot; - slot = objnode->slots[offset]; - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } - BUG_ON(slot != NULL); - if (objnode) { - objnode->slots_in_use++; - objnode->slots[offset] = pampd; - } else - obj->objnode_tree_root = pampd; - obj->pampd_count++; -out: - return ret; -} - -static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) -{ - struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; - struct tmem_objnode_tree_path *pathp = path; - struct tmem_objnode *slot = NULL; - unsigned int height, shift; - int offset; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - height = obj->objnode_tree_height; - if (index > tmem_objnode_tree_h2max[height]) - goto out; - slot = obj->objnode_tree_root; - if (height == 0 && obj->objnode_tree_root) { - obj->objnode_tree_root = NULL; - goto out; - } - shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; - pathp->objnode = NULL; - do { - if (slot == NULL) - goto out; - pathp++; - offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; - pathp->offset = offset; - pathp->objnode = slot; - slot = slot->slots[offset]; - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } while (height > 0); - if (slot == NULL) - goto out; - while (pathp->objnode) { - pathp->objnode->slots[pathp->offset] = NULL; - pathp->objnode->slots_in_use--; - if (pathp->objnode->slots_in_use) { - if (pathp->objnode == obj->objnode_tree_root) { - while (obj->objnode_tree_height > 0 && - obj->objnode_tree_root->slots_in_use == 1 && - obj->objnode_tree_root->slots[0]) { - struct tmem_objnode *to_free = - obj->objnode_tree_root; - - obj->objnode_tree_root = - to_free->slots[0]; - obj->objnode_tree_height--; - to_free->slots[0] = NULL; - to_free->slots_in_use = 0; - tmem_objnode_free(to_free); - } - } - goto out; - } - tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ - pathp--; - } - obj->objnode_tree_height = 0; - obj->objnode_tree_root = NULL; - -out: - if (slot != NULL) - obj->pampd_count--; - BUG_ON(obj->pampd_count < 0); - return slot; -} - -/* Recursively walk the objnode_tree destroying pampds and objnodes. */ -static void tmem_objnode_node_destroy(struct tmem_obj *obj, - struct tmem_objnode *objnode, - unsigned int ht) -{ - int i; - - if (ht == 0) - return; - for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { - if (objnode->slots[i]) { - if (ht == 1) { - obj->pampd_count--; - (*tmem_pamops.free)(objnode->slots[i], - obj->pool, NULL, 0, true); - objnode->slots[i] = NULL; - continue; - } - tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); - tmem_objnode_free(objnode->slots[i]); - objnode->slots[i] = NULL; - } - } -} - -static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj, - bool pool_destroy) -{ - if (obj->objnode_tree_root == NULL) - return; - if (obj->objnode_tree_height == 0) { - obj->pampd_count--; - (*tmem_pamops.free)(obj->objnode_tree_root, - obj->pool, NULL, 0, true); - } else { - tmem_objnode_node_destroy(obj, obj->objnode_tree_root, - obj->objnode_tree_height); - tmem_objnode_free(obj->objnode_tree_root); - obj->objnode_tree_height = 0; - } - obj->objnode_tree_root = NULL; -#ifdef CONFIG_RAMSTER - if (tmem_pamops.free_obj != NULL) - (*tmem_pamops.free_obj)(obj->pool, obj, pool_destroy); -#endif -} - -/* - * Tmem is operated on by a set of well-defined actions: - * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". - * (The tmem ABI allows for subpages and exchanges but these operations - * are not included in this implementation.) - * - * These "tmem core" operations are implemented in the following functions. - */ - -/* - * "Put" a page, e.g. associate the passed pampd with the passed handle. - * Tmem_put is complicated by a corner case: What if a page with matching - * handle already exists in tmem? To guarantee coherency, one of two - * actions is necessary: Either the data for the page must be overwritten, - * or the page must be "flushed" so that the data is not accessible to a - * subsequent "get". Since these "duplicate puts" are relatively rare, - * this implementation always flushes for simplicity. - */ -int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, - bool raw, void *pampd_to_use) -{ - struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; - void *pampd = NULL, *pampd_del = NULL; - int ret = -ENOMEM; - struct tmem_hashbucket *hb; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = objfound = tmem_obj_find(hb, oidp); - if (obj != NULL) { - pampd = tmem_pampd_lookup_in_obj(objfound, index); - if (pampd != NULL) { - /* if found, is a dup put, flush the old one */ - pampd_del = tmem_pampd_delete_from_obj(obj, index); - BUG_ON(pampd_del != pampd); - (*tmem_pamops.free)(pampd, pool, oidp, index, true); - if (obj->pampd_count == 0) { - objnew = obj; - objfound = NULL; - } - pampd = NULL; - } - } else { - obj = objnew = (*tmem_hostops.obj_alloc)(pool); - if (unlikely(obj == NULL)) { - ret = -ENOMEM; - goto out; - } - tmem_obj_init(obj, hb, pool, oidp); - } - BUG_ON(obj == NULL); - BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); - pampd = pampd_to_use; - BUG_ON(pampd_to_use == NULL); - ret = tmem_pampd_add_to_obj(obj, index, pampd); - if (unlikely(ret == -ENOMEM)) - /* may have partially built objnode tree ("stump") */ - goto delete_and_free; - (*tmem_pamops.create_finish)(pampd, is_ephemeral(pool)); - goto out; - -delete_and_free: - (void)tmem_pampd_delete_from_obj(obj, index); - if (pampd) - (*tmem_pamops.free)(pampd, pool, NULL, 0, true); - if (objnew) { - tmem_obj_free(objnew, hb); - (*tmem_hostops.obj_free)(objnew, pool); - } -out: - spin_unlock(&hb->lock); - return ret; -} - -#ifdef CONFIG_RAMSTER -/* - * For ramster only: The following routines provide a two-step sequence - * to allow the caller to replace a pampd in the tmem data structures with - * another pampd. Here, we lookup the passed handle and, if found, return the - * associated pampd and object, leaving the hashbucket locked and returning - * a reference to it. The caller is expected to immediately call the - * matching tmem_localify_finish routine which will handles the replacement - * and unlocks the hashbucket. - */ -void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp, - uint32_t index, struct tmem_obj **ret_obj, - void **saved_hb) -{ - struct tmem_hashbucket *hb; - struct tmem_obj *obj = NULL; - void *pampd = NULL; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (likely(obj != NULL)) - pampd = tmem_pampd_lookup_in_obj(obj, index); - *ret_obj = obj; - *saved_hb = (void *)hb; - /* note, hashbucket remains locked */ - return pampd; -} -EXPORT_SYMBOL_GPL(tmem_localify_get_pampd); - -void tmem_localify_finish(struct tmem_obj *obj, uint32_t index, - void *pampd, void *saved_hb, bool delete) -{ - struct tmem_hashbucket *hb = (struct tmem_hashbucket *)saved_hb; - - BUG_ON(!spin_is_locked(&hb->lock)); - if (pampd != NULL) { - BUG_ON(obj == NULL); - (void)tmem_pampd_replace_in_obj(obj, index, pampd, 1); - (*tmem_pamops.create_finish)(pampd, is_ephemeral(obj->pool)); - } else if (delete) { - BUG_ON(obj == NULL); - (void)tmem_pampd_delete_from_obj(obj, index); - } - spin_unlock(&hb->lock); -} -EXPORT_SYMBOL_GPL(tmem_localify_finish); - -/* - * For ramster only. Helper function to support asynchronous tmem_get. - */ -static int tmem_repatriate(void **ppampd, struct tmem_hashbucket *hb, - struct tmem_pool *pool, struct tmem_oid *oidp, - uint32_t index, bool free, char *data) -{ - void *old_pampd = *ppampd, *new_pampd = NULL; - bool intransit = false; - int ret = 0; - - if (!is_ephemeral(pool)) - new_pampd = (*tmem_pamops.repatriate_preload)( - old_pampd, pool, oidp, index, &intransit); - if (intransit) - ret = -EAGAIN; - else if (new_pampd != NULL) - *ppampd = new_pampd; - /* must release the hb->lock else repatriate can't sleep */ - spin_unlock(&hb->lock); - if (!intransit) - ret = (*tmem_pamops.repatriate)(old_pampd, new_pampd, pool, - oidp, index, free, data); - if (ret == -EAGAIN) { - /* rare I think, but should cond_resched()??? */ - usleep_range(10, 1000); - } else if (ret == -ENOTCONN || ret == -EHOSTDOWN) { - ret = -1; - } else if (ret != 0 && ret != -ENOENT) { - ret = -1; - } - /* note hb->lock has now been unlocked */ - return ret; -} - -/* - * For ramster only. If a page in tmem matches the handle, replace the - * page so that any subsequent "get" gets the new page. Returns 0 if - * there was a page to replace, else returns -1. - */ -int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, - uint32_t index, void *new_pampd) -{ - struct tmem_obj *obj; - int ret = -1; - struct tmem_hashbucket *hb; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd, 0); - /* if we bug here, pamops wasn't properly set up for ramster */ - BUG_ON(tmem_pamops.replace_in_obj == NULL); - ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); -out: - spin_unlock(&hb->lock); - return ret; -} -EXPORT_SYMBOL_GPL(tmem_replace); -#endif - -/* - * "Get" a page, e.g. if a pampd can be found matching the passed handle, - * use a pamops callback to recreated the page from the pampd with the - * matching handle. By tmem definition, when a "get" is successful on - * an ephemeral page, the page is "flushed", and when a "get" is successful - * on a persistent page, the page is retained in tmem. Note that to preserve - * coherency, "get" can never be skipped if tmem contains the data. - * That is, if a get is done with a certain handle and fails, any - * subsequent "get" must also fail (unless of course there is a - * "put" done with the same handle). - */ -int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, - char *data, size_t *sizep, bool raw, int get_and_free) -{ - struct tmem_obj *obj; - void *pampd = NULL; - bool ephemeral = is_ephemeral(pool); - int ret = -1; - struct tmem_hashbucket *hb; - bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); - bool lock_held = false; - void **ppampd; - - do { - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - lock_held = true; - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - ppampd = __tmem_pampd_lookup_in_obj(obj, index); - if (ppampd == NULL) - goto out; -#ifdef CONFIG_RAMSTER - if ((tmem_pamops.is_remote != NULL) && - tmem_pamops.is_remote(*ppampd)) { - ret = tmem_repatriate(ppampd, hb, pool, oidp, - index, free, data); - /* tmem_repatriate releases hb->lock */ - lock_held = false; - *sizep = PAGE_SIZE; - if (ret != -EAGAIN) - goto out; - } -#endif - } while (ret == -EAGAIN); - if (free) - pampd = tmem_pampd_delete_from_obj(obj, index); - else - pampd = tmem_pampd_lookup_in_obj(obj, index); - if (pampd == NULL) - goto out; - if (free) { - if (obj->pampd_count == 0) { - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - obj = NULL; - } - } - if (free) - ret = (*tmem_pamops.get_data_and_free)( - data, sizep, raw, pampd, pool, oidp, index); - else - ret = (*tmem_pamops.get_data)( - data, sizep, raw, pampd, pool, oidp, index); - if (ret < 0) - goto out; - ret = 0; -out: - if (lock_held) - spin_unlock(&hb->lock); - return ret; -} - -/* - * If a page in tmem matches the handle, "flush" this page from tmem such - * that any subsequent "get" does not succeed (unless, of course, there - * was another "put" with the same handle). - */ -int tmem_flush_page(struct tmem_pool *pool, - struct tmem_oid *oidp, uint32_t index) -{ - struct tmem_obj *obj; - void *pampd; - int ret = -1; - struct tmem_hashbucket *hb; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - pampd = tmem_pampd_delete_from_obj(obj, index); - if (pampd == NULL) - goto out; - (*tmem_pamops.free)(pampd, pool, oidp, index, true); - if (obj->pampd_count == 0) { - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - } - ret = 0; - -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * "Flush" all pages in tmem matching this oid. - */ -int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) -{ - struct tmem_obj *obj; - struct tmem_hashbucket *hb; - int ret = -1; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - tmem_pampd_destroy_all_in_obj(obj, false); - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - ret = 0; - -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * "Flush" all pages (and tmem_objs) from this tmem_pool and disable - * all subsequent access to this tmem_pool. - */ -int tmem_destroy_pool(struct tmem_pool *pool) -{ - int ret = -1; - - if (pool == NULL) - goto out; - tmem_pool_flush(pool, 1); - ret = 0; -out: - return ret; -} - -static LIST_HEAD(tmem_global_pool_list); - -/* - * Create a new tmem_pool with the provided flag and return - * a pool id provided by the tmem host implementation. - */ -void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) -{ - int persistent = flags & TMEM_POOL_PERSIST; - int shared = flags & TMEM_POOL_SHARED; - struct tmem_hashbucket *hb = &pool->hashbucket[0]; - int i; - - for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { - hb->obj_rb_root = RB_ROOT; - spin_lock_init(&hb->lock); - } - INIT_LIST_HEAD(&pool->pool_list); - atomic_set(&pool->obj_count, 0); - SET_SENTINEL(pool, POOL); - list_add_tail(&pool->pool_list, &tmem_global_pool_list); - pool->persistent = persistent; - pool->shared = shared; -} |