diff options
Diffstat (limited to 'fs/afs/vlocation.c')
-rw-r--r-- | fs/afs/vlocation.c | 1153 |
1 files changed, 469 insertions, 684 deletions
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index e48728c9217..60cb2f408c7 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -1,6 +1,6 @@ -/* volume location management +/* AFS volume location management * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -12,130 +12,60 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/pagemap.h> -#include "volume.h" -#include "cell.h" -#include "cmservice.h" -#include "fsclient.h" -#include "vlclient.h" -#include "kafstimod.h" -#include <rxrpc/connection.h> #include "internal.h" -#define AFS_VLDB_TIMEOUT HZ*1000 +unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ +unsigned afs_vlocation_update_timeout = 10 * 60; -static void afs_vlocation_update_timer(struct afs_timer *timer); -static void afs_vlocation_update_attend(struct afs_async_op *op); -static void afs_vlocation_update_discard(struct afs_async_op *op); -static void __afs_put_vlocation(struct afs_vlocation *vlocation); +static void afs_vlocation_reaper(struct work_struct *); +static void afs_vlocation_updater(struct work_struct *); -static void __afs_vlocation_timeout(struct afs_timer *timer) -{ - struct afs_vlocation *vlocation = - list_entry(timer, struct afs_vlocation, timeout); - - _debug("VL TIMEOUT [%s{u=%d}]", - vlocation->vldb.name, atomic_read(&vlocation->usage)); - - afs_vlocation_do_timeout(vlocation); -} - -static const struct afs_timer_ops afs_vlocation_timer_ops = { - .timed_out = __afs_vlocation_timeout, -}; - -static const struct afs_timer_ops afs_vlocation_update_timer_ops = { - .timed_out = afs_vlocation_update_timer, -}; - -static const struct afs_async_op_ops afs_vlocation_update_op_ops = { - .attend = afs_vlocation_update_attend, - .discard = afs_vlocation_update_discard, -}; - -static LIST_HEAD(afs_vlocation_update_pendq); /* queue of VLs awaiting update */ -static struct afs_vlocation *afs_vlocation_update; /* VL currently being updated */ -static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */ - -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry); -static void afs_vlocation_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vlocation_cache_index_def = { - .name = "vldb", - .data_size = sizeof(struct afs_cache_vlocation), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_vlocation_cache_match, - .update = afs_vlocation_cache_update, -}; -#endif +static LIST_HEAD(afs_vlocation_updates); +static LIST_HEAD(afs_vlocation_graveyard); +static DEFINE_SPINLOCK(afs_vlocation_updates_lock); +static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); +static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); +static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); +static struct workqueue_struct *afs_vlocation_update_worker; /* * iterate through the VL servers in a cell until one of them admits knowing * about the volume in question - * - caller must have cell->vl_sem write-locked */ -static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation, - const char *name, - unsigned namesz, +static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, struct afs_cache_vlocation *vldb) { - struct afs_server *server = NULL; - struct afs_cell *cell = vlocation->cell; + struct afs_cell *cell = vl->cell; + struct in_addr addr; int count, ret; - _enter("%s,%*.*s,%u", cell->name, namesz, namesz, name, namesz); + _enter("%s,%s", cell->name, vl->vldb.name); + down_write(&vl->cell->vl_sem); ret = -ENOMEDIUM; for (count = cell->vl_naddrs; count > 0; count--) { - _debug("CellServ[%hu]: %08x", - cell->vl_curr_svix, - cell->vl_addrs[cell->vl_curr_svix].s_addr); - - /* try and create a server */ - ret = afs_server_lookup(cell, - &cell->vl_addrs[cell->vl_curr_svix], - &server); - switch (ret) { - case 0: - break; - case -ENOMEM: - case -ENONET: - goto out; - default: - goto rotate; - } + addr = cell->vl_addrs[cell->vl_curr_svix]; + + _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); /* attempt to access the VL server */ - ret = afs_rxvl_get_entry_by_name(server, name, namesz, vldb); + ret = afs_vl_get_entry_by_name(&addr, vl->vldb.name, vldb, + &afs_sync_call); switch (ret) { case 0: - afs_put_server(server); goto out; case -ENOMEM: case -ENONET: case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - down_write(&server->sem); - if (server->vlserver) { - rxrpc_put_connection(server->vlserver); - server->vlserver = NULL; - } - up_write(&server->sem); - afs_put_server(server); if (ret == -ENOMEM || ret == -ENONET) goto out; goto rotate; case -ENOMEDIUM: - afs_put_server(server); goto out; default: - afs_put_server(server); - ret = -ENOMEDIUM; + ret = -EIO; goto rotate; } @@ -146,6 +76,7 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation, } out: + up_write(&vl->cell->vl_sem); _leave(" = %d", ret); return ret; } @@ -153,66 +84,56 @@ out: /* * iterate through the VL servers in a cell until one of them admits knowing * about the volume in question - * - caller must have cell->vl_sem write-locked */ -static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation, +static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, afs_volid_t volid, afs_voltype_t voltype, struct afs_cache_vlocation *vldb) { - struct afs_server *server = NULL; - struct afs_cell *cell = vlocation->cell; + struct afs_cell *cell = vl->cell; + struct in_addr addr; int count, ret; _enter("%s,%x,%d,", cell->name, volid, voltype); + down_write(&vl->cell->vl_sem); ret = -ENOMEDIUM; for (count = cell->vl_naddrs; count > 0; count--) { - _debug("CellServ[%hu]: %08x", - cell->vl_curr_svix, - cell->vl_addrs[cell->vl_curr_svix].s_addr); - - /* try and create a server */ - ret = afs_server_lookup(cell, - &cell->vl_addrs[cell->vl_curr_svix], - &server); - switch (ret) { - case 0: - break; - case -ENOMEM: - case -ENONET: - goto out; - default: - goto rotate; - } + addr = cell->vl_addrs[cell->vl_curr_svix]; + + _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); /* attempt to access the VL server */ - ret = afs_rxvl_get_entry_by_id(server, volid, voltype, vldb); + ret = afs_vl_get_entry_by_id(&addr, volid, voltype, vldb, + &afs_sync_call); switch (ret) { case 0: - afs_put_server(server); goto out; case -ENOMEM: case -ENONET: case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - down_write(&server->sem); - if (server->vlserver) { - rxrpc_put_connection(server->vlserver); - server->vlserver = NULL; - } - up_write(&server->sem); - afs_put_server(server); if (ret == -ENOMEM || ret == -ENONET) goto out; goto rotate; + case -EBUSY: + vl->upd_busy_cnt++; + if (vl->upd_busy_cnt <= 3) { + if (vl->upd_busy_cnt > 1) { + /* second+ BUSY - sleep a little bit */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + __set_current_state(TASK_RUNNING); + } + continue; + } + break; case -ENOMEDIUM: - afs_put_server(server); - goto out; + vl->upd_rej_cnt++; + goto rotate; default: - afs_put_server(server); - ret = -ENOMEDIUM; + ret = -EIO; goto rotate; } @@ -220,150 +141,83 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation, rotate: cell->vl_curr_svix++; cell->vl_curr_svix %= cell->vl_naddrs; + vl->upd_busy_cnt = 0; } out: + if (ret < 0 && vl->upd_rej_cnt > 0) { + printk(KERN_NOTICE "kAFS:" + " Active volume no longer valid '%s'\n", + vl->vldb.name); + vl->valid = 0; + ret = -ENOMEDIUM; + } + + up_write(&vl->cell->vl_sem); _leave(" = %d", ret); return ret; } /* - * lookup volume location - * - caller must have cell->vol_sem write-locked - * - iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - * - lookup in the local cache if not able to find on the VL server - * - insert/update in the local cache if did get a VL response + * allocate a volume location record */ -int afs_vlocation_lookup(struct afs_cell *cell, - const char *name, - unsigned namesz, - struct afs_vlocation **_vlocation) +static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, + const char *name, + size_t namesz) { - struct afs_cache_vlocation vldb; - struct afs_vlocation *vlocation; - afs_voltype_t voltype; - afs_volid_t vid; - int active = 0, ret; - - _enter("{%s},%*.*s,%u,", cell->name, namesz, namesz, name, namesz); - - if (namesz > sizeof(vlocation->vldb.name)) { - _leave(" = -ENAMETOOLONG"); - return -ENAMETOOLONG; - } - - /* search the cell's active list first */ - list_for_each_entry(vlocation, &cell->vl_list, link) { - if (namesz < sizeof(vlocation->vldb.name) && - vlocation->vldb.name[namesz] != '\0') - continue; - - if (memcmp(vlocation->vldb.name, name, namesz) == 0) - goto found_in_memory; - } - - /* search the cell's graveyard list second */ - spin_lock(&cell->vl_gylock); - list_for_each_entry(vlocation, &cell->vl_graveyard, link) { - if (namesz < sizeof(vlocation->vldb.name) && - vlocation->vldb.name[namesz] != '\0') - continue; - - if (memcmp(vlocation->vldb.name, name, namesz) == 0) - goto found_in_graveyard; - } - spin_unlock(&cell->vl_gylock); - - /* not in the cell's in-memory lists - create a new record */ - vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); - if (!vlocation) - return -ENOMEM; - - atomic_set(&vlocation->usage, 1); - INIT_LIST_HEAD(&vlocation->link); - rwlock_init(&vlocation->lock); - memcpy(vlocation->vldb.name, name, namesz); - - afs_timer_init(&vlocation->timeout, &afs_vlocation_timer_ops); - afs_timer_init(&vlocation->upd_timer, &afs_vlocation_update_timer_ops); - afs_async_op_init(&vlocation->upd_op, &afs_vlocation_update_op_ops); - - afs_get_cell(cell); - vlocation->cell = cell; - - list_add_tail(&vlocation->link, &cell->vl_list); - -#ifdef AFS_CACHING_SUPPORT - /* we want to store it in the cache, plus it might already be - * encached */ - cachefs_acquire_cookie(cell->cache, - &afs_volume_cache_index_def, - vlocation, - &vlocation->cache); - - if (vlocation->valid) - goto found_in_cache; -#endif - - /* try to look up an unknown volume in the cell VL databases by name */ - ret = afs_vlocation_access_vl_by_name(vlocation, name, namesz, &vldb); - if (ret < 0) { - printk("kAFS: failed to locate '%*.*s' in cell '%s'\n", - namesz, namesz, name, cell->name); - goto error; + struct afs_vlocation *vl; + + vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); + if (vl) { + vl->cell = cell; + vl->state = AFS_VL_NEW; + atomic_set(&vl->usage, 1); + INIT_LIST_HEAD(&vl->link); + INIT_LIST_HEAD(&vl->grave); + INIT_LIST_HEAD(&vl->update); + init_waitqueue_head(&vl->waitq); + rwlock_init(&vl->lock); + memcpy(vl->vldb.name, name, namesz); } - goto found_on_vlserver; - -found_in_graveyard: - /* found in the graveyard - resurrect */ - _debug("found in graveyard"); - atomic_inc(&vlocation->usage); - list_move_tail(&vlocation->link, &cell->vl_list); - spin_unlock(&cell->vl_gylock); - - afs_kafstimod_del_timer(&vlocation->timeout); - goto active; - -found_in_memory: - /* found in memory - check to see if it's active */ - _debug("found in memory"); - atomic_inc(&vlocation->usage); + _leave(" = %p", vl); + return vl; +} -active: - active = 1; +/* + * update record if we found it in the cache + */ +static int afs_vlocation_update_record(struct afs_vlocation *vl, + struct afs_cache_vlocation *vldb) +{ + afs_voltype_t voltype; + afs_volid_t vid; + int ret; -#ifdef AFS_CACHING_SUPPORT -found_in_cache: -#endif /* try to look up a cached volume in the cell VL databases by ID */ - _debug("found in cache"); - _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", - vlocation->vldb.name, - vlocation->vldb.vidmask, - ntohl(vlocation->vldb.servers[0].s_addr), - vlocation->vldb.srvtmask[0], - ntohl(vlocation->vldb.servers[1].s_addr), - vlocation->vldb.srvtmask[1], - ntohl(vlocation->vldb.servers[2].s_addr), - vlocation->vldb.srvtmask[2] - ); + vl->vldb.name, + vl->vldb.vidmask, + ntohl(vl->vldb.servers[0].s_addr), + vl->vldb.srvtmask[0], + ntohl(vl->vldb.servers[1].s_addr), + vl->vldb.srvtmask[1], + ntohl(vl->vldb.servers[2].s_addr), + vl->vldb.srvtmask[2]); _debug("Vids: %08x %08x %08x", - vlocation->vldb.vid[0], - vlocation->vldb.vid[1], - vlocation->vldb.vid[2]); + vl->vldb.vid[0], + vl->vldb.vid[1], + vl->vldb.vid[2]); - if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) { - vid = vlocation->vldb.vid[0]; + if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { + vid = vl->vldb.vid[0]; voltype = AFSVL_RWVOL; - } else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) { - vid = vlocation->vldb.vid[1]; + } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { + vid = vl->vldb.vid[1]; voltype = AFSVL_ROVOL; - } else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) { - vid = vlocation->vldb.vid[2]; + } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { + vid = vl->vldb.vid[2]; voltype = AFSVL_BACKVOL; } else { BUG(); @@ -371,551 +225,482 @@ found_in_cache: voltype = 0; } - ret = afs_vlocation_access_vl_by_id(vlocation, vid, voltype, &vldb); + /* contact the server to make sure the volume is still available + * - TODO: need to handle disconnected operation here + */ + ret = afs_vlocation_access_vl_by_id(vl, vid, voltype, vldb); switch (ret) { /* net error */ default: - printk("kAFS: failed to volume '%*.*s' (%x) up in '%s': %d\n", - namesz, namesz, name, vid, cell->name, ret); - goto error; + printk(KERN_WARNING "kAFS:" + " failed to update volume '%s' (%x) up in '%s': %d\n", + vl->vldb.name, vid, vl->cell->name, ret); + _leave(" = %d", ret); + return ret; /* pulled from local cache into memory */ case 0: - goto found_on_vlserver; + _leave(" = 0"); + return 0; /* uh oh... looks like the volume got deleted */ case -ENOMEDIUM: - printk("kAFS: volume '%*.*s' (%x) does not exist '%s'\n", - namesz, namesz, name, vid, cell->name); + printk(KERN_ERR "kAFS:" + " volume '%s' (%x) does not exist '%s'\n", + vl->vldb.name, vid, vl->cell->name); /* TODO: make existing record unavailable */ - goto error; + _leave(" = %d", ret); + return ret; } +} -found_on_vlserver: - _debug("Done VL Lookup: %*.*s %02x { %08x(%x) %08x(%x) %08x(%x) }", - namesz, namesz, name, - vldb.vidmask, - ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0], - ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1], - ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2] - ); - - _debug("Vids: %08x %08x %08x", vldb.vid[0], vldb.vid[1], vldb.vid[2]); +/* + * apply the update to a VL record + */ +static void afs_vlocation_apply_update(struct afs_vlocation *vl, + struct afs_cache_vlocation *vldb) +{ + _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", + vldb->name, vldb->vidmask, + ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], + ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], + ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); - if ((namesz < sizeof(vlocation->vldb.name) && - vlocation->vldb.name[namesz] != '\0') || - memcmp(vldb.name, name, namesz) != 0) - printk("kAFS: name of volume '%*.*s' changed to '%s' on server\n", - namesz, namesz, name, vldb.name); + _debug("Vids: %08x %08x %08x", + vldb->vid[0], vldb->vid[1], vldb->vid[2]); - memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb)); + if (strcmp(vldb->name, vl->vldb.name) != 0) + printk(KERN_NOTICE "kAFS:" + " name of volume '%s' changed to '%s' on server\n", + vl->vldb.name, vldb->name); - afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ); + vl->vldb = *vldb; #ifdef AFS_CACHING_SUPPORT /* update volume entry in local cache */ - cachefs_update_cookie(vlocation->cache); -#endif - - *_vlocation = vlocation; - _leave(" = 0 (%p)",vlocation); - return 0; - -error: - if (vlocation) { - if (active) { - __afs_put_vlocation(vlocation); - } else { - list_del(&vlocation->link); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); + cachefs_update_cookie(vl->cache); #endif - afs_put_cell(vlocation->cell); - kfree(vlocation); - } - } - - _leave(" = %d", ret); - return ret; } /* - * finish using a volume location record - * - caller must have cell->vol_sem write-locked + * fill in a volume location record, consulting the cache and the VL server + * both */ -static void __afs_put_vlocation(struct afs_vlocation *vlocation) +static int afs_vlocation_fill_in_record(struct afs_vlocation *vl) { - struct afs_cell *cell; + struct afs_cache_vlocation vldb; + int ret; - if (!vlocation) - return; + _enter(""); - _enter("%s", vlocation->vldb.name); + ASSERTCMP(vl->valid, ==, 0); - cell = vlocation->cell; + memset(&vldb, 0, sizeof(vldb)); - /* sanity check */ - BUG_ON(atomic_read(&vlocation->usage) <= 0); + /* see if we have an in-cache copy (will set vl->valid if there is) */ +#ifdef AFS_CACHING_SUPPORT + cachefs_acquire_cookie(cell->cache, + &afs_volume_cache_index_def, + vlocation, + &vl->cache); +#endif - spin_lock(&cell->vl_gylock); - if (likely(!atomic_dec_and_test(&vlocation->usage))) { - spin_unlock(&cell->vl_gylock); - _leave(""); - return; + if (vl->valid) { + /* try to update a known volume in the cell VL databases by + * ID as the name may have changed */ + _debug("found in cache"); + ret = afs_vlocation_update_record(vl, &vldb); + } else { + /* try to look up an unknown volume in the cell VL databases by + * name */ + ret = afs_vlocation_access_vl_by_name(vl, &vldb); + if (ret < 0) { + printk("kAFS: failed to locate '%s' in cell '%s'\n", + vl->vldb.name, vl->cell->name); + return ret; + } } - /* move to graveyard queue */ - list_move_tail(&vlocation->link,&cell->vl_graveyard); - - /* remove from pending timeout queue (refcounted if actually being - * updated) */ - list_del_init(&vlocation->upd_op.link); - - /* time out in 10 secs */ - afs_kafstimod_del_timer(&vlocation->upd_timer); - afs_kafstimod_add_timer(&vlocation->timeout, 10 * HZ); - - spin_unlock(&cell->vl_gylock); - - _leave(" [killed]"); + afs_vlocation_apply_update(vl, &vldb); + _leave(" = 0"); + return 0; } /* - * finish using a volume location record + * queue a vlocation record for updates */ -void afs_put_vlocation(struct afs_vlocation *vlocation) +void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) { - if (vlocation) { - struct afs_cell *cell = vlocation->cell; + struct afs_vlocation *xvl; - down_write(&cell->vl_sem); - __afs_put_vlocation(vlocation); - up_write(&cell->vl_sem); + /* wait at least 10 minutes before updating... */ + vl->update_at = get_seconds() + afs_vlocation_update_timeout; + + spin_lock(&afs_vlocation_updates_lock); + + if (!list_empty(&afs_vlocation_updates)) { + /* ... but wait at least 1 second more than the newest record + * already queued so that we don't spam the VL server suddenly + * with lots of requests + */ + xvl = list_entry(afs_vlocation_updates.prev, + struct afs_vlocation, update); + if (vl->update_at <= xvl->update_at) + vl->update_at = xvl->update_at + 1; + } else { + queue_delayed_work(afs_vlocation_update_worker, + &afs_vlocation_update, + afs_vlocation_update_timeout * HZ); } + + list_add_tail(&vl->update, &afs_vlocation_updates); + spin_unlock(&afs_vlocation_updates_lock); } /* - * timeout vlocation record - * - removes from the cell's graveyard if the usage count is zero + * lookup volume location + * - iterate through the VL servers in a cell until one of them admits knowing + * about the volume in question + * - lookup in the local cache if not able to find on the VL server + * - insert/update in the local cache if did get a VL response */ -void afs_vlocation_do_timeout(struct afs_vlocation *vlocation) +struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, + const char *name, + size_t namesz) { - struct afs_cell *cell; - - _enter("%s", vlocation->vldb.name); - - cell = vlocation->cell; + struct afs_vlocation *vl; + int ret; - BUG_ON(atomic_read(&vlocation->usage) < 0); + _enter("{%s},%*.*s,%zu", + cell->name, (int) namesz, (int) namesz, name, namesz); - /* remove from graveyard if still dead */ - spin_lock(&cell->vl_gylock); - if (atomic_read(&vlocation->usage) == 0) - list_del_init(&vlocation->link); - else - vlocation = NULL; - spin_unlock(&cell->vl_gylock); + if (namesz > sizeof(vl->vldb.name)) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } - if (!vlocation) { - _leave(""); - return; /* resurrected */ + /* see if we have an in-memory copy first */ + down_write(&cell->vl_sem); + spin_lock(&cell->vl_lock); + list_for_each_entry(vl, &cell->vl_list, link) { + if (vl->vldb.name[namesz] != '\0') + continue; + if (memcmp(vl->vldb.name, name, namesz) == 0) + goto found_in_memory; } + spin_unlock(&cell->vl_lock); - /* we can now destroy it properly */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); -#endif - afs_put_cell(cell); + /* not in the cell's in-memory lists - create a new record */ + vl = afs_vlocation_alloc(cell, name, namesz); + if (!vl) { + up_write(&cell->vl_sem); + return ERR_PTR(-ENOMEM); + } - kfree(vlocation); + afs_get_cell(cell); - _leave(" [destroyed]"); -} + list_add_tail(&vl->link, &cell->vl_list); + vl->state = AFS_VL_CREATING; + up_write(&cell->vl_sem); -/* - * send an update operation to the currently selected server - */ -static int afs_vlocation_update_begin(struct afs_vlocation *vlocation) -{ - afs_voltype_t voltype; - afs_volid_t vid; - int ret; +fill_in_record: + ret = afs_vlocation_fill_in_record(vl); + if (ret < 0) + goto error_abandon; + vl->state = AFS_VL_VALID; + wake_up(&vl->waitq); - _enter("%s{ufs=%u ucs=%u}", - vlocation->vldb.name, - vlocation->upd_first_svix, - vlocation->upd_curr_svix); + /* schedule for regular updates */ + afs_vlocation_queue_for_updates(vl); + goto success; - /* try to look up a cached volume in the cell VL databases by ID */ - if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) { - vid = vlocation->vldb.vid[0]; - voltype = AFSVL_RWVOL; - } else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) { - vid = vlocation->vldb.vid[1]; - voltype = AFSVL_ROVOL; - } else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) { - vid = vlocation->vldb.vid[2]; - voltype = AFSVL_BACKVOL; - } else { - BUG(); - vid = 0; - voltype = 0; +found_in_memory: + /* found in memory */ + _debug("found in memory"); + atomic_inc(&vl->usage); + spin_unlock(&cell->vl_lock); + if (!list_empty(&vl->grave)) { + spin_lock(&afs_vlocation_graveyard_lock); + list_del_init(&vl->grave); + spin_unlock(&afs_vlocation_graveyard_lock); } + up_write(&cell->vl_sem); - /* contact the chosen server */ - ret = afs_server_lookup( - vlocation->cell, - &vlocation->cell->vl_addrs[vlocation->upd_curr_svix], - &vlocation->upd_op.server); + /* see if it was an abandoned record that we might try filling in */ + while (vl->state != AFS_VL_VALID) { + afs_vlocation_state_t state = vl->state; - switch (ret) { - case 0: - break; - case -ENOMEM: - case -ENONET: - default: - _leave(" = %d", ret); - return ret; - } + _debug("invalid [state %d]", state); - /* initiate the update operation */ - ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op, vid, voltype); - if (ret < 0) { - _leave(" = %d", ret); - return ret; + if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) { + if (cmpxchg(&vl->state, state, AFS_VL_CREATING) == + state) + goto fill_in_record; + continue; + } + + /* must now wait for creation or update by someone else to + * complete */ + _debug("wait"); + + ret = wait_event_interruptible( + vl->waitq, + vl->state == AFS_VL_NEW || + vl->state == AFS_VL_VALID || + vl->state == AFS_VL_NO_VOLUME); + if (ret < 0) + goto error; } +success: + _leave(" = %p",vl); + return vl; + +error_abandon: + vl->state = AFS_VL_NEW; + wake_up(&vl->waitq); +error: + ASSERT(vl != NULL); + afs_put_vlocation(vl); _leave(" = %d", ret); - return ret; + return ERR_PTR(ret); } /* - * abandon updating a VL record - * - does not restart the update timer + * finish using a volume location record */ -static void afs_vlocation_update_abandon(struct afs_vlocation *vlocation, - afs_vlocation_upd_t state, - int ret) +void afs_put_vlocation(struct afs_vlocation *vl) { - _enter("%s,%u", vlocation->vldb.name, state); - - if (ret < 0) - printk("kAFS: Abandoning VL update '%s': %d\n", - vlocation->vldb.name, ret); - - /* discard the server record */ - afs_put_server(vlocation->upd_op.server); - vlocation->upd_op.server = NULL; + if (!vl) + return; - spin_lock(&afs_vlocation_update_lock); - afs_vlocation_update = NULL; - vlocation->upd_state = state; + _enter("%s", vl->vldb.name); - /* TODO: start updating next VL record on pending list */ + ASSERTCMP(atomic_read(&vl->usage), >, 0); - spin_unlock(&afs_vlocation_update_lock); + if (likely(!atomic_dec_and_test(&vl->usage))) { + _leave(""); + return; + } - _leave(""); + spin_lock(&afs_vlocation_graveyard_lock); + if (atomic_read(&vl->usage) == 0) { + _debug("buried"); + list_move_tail(&vl->grave, &afs_vlocation_graveyard); + vl->time_of_death = get_seconds(); + schedule_delayed_work(&afs_vlocation_reap, + afs_vlocation_timeout * HZ); + + /* suspend updates on this record */ + if (!list_empty(&vl->update)) { + spin_lock(&afs_vlocation_updates_lock); + list_del_init(&vl->update); + spin_unlock(&afs_vlocation_updates_lock); + } + } + spin_unlock(&afs_vlocation_graveyard_lock); + _leave(" [killed?]"); } /* - * handle periodic update timeouts and busy retry timeouts - * - called from kafstimod + * destroy a dead volume location record */ -static void afs_vlocation_update_timer(struct afs_timer *timer) +static void afs_vlocation_destroy(struct afs_vlocation *vl) { - struct afs_vlocation *vlocation = - list_entry(timer, struct afs_vlocation, upd_timer); - int ret; + _enter("%p", vl); - _enter("%s", vlocation->vldb.name); +#ifdef AFS_CACHING_SUPPORT + cachefs_relinquish_cookie(vl->cache, 0); +#endif - /* only update if not in the graveyard (defend against putting too) */ - spin_lock(&vlocation->cell->vl_gylock); + afs_put_cell(vl->cell); + kfree(vl); +} + +/* + * reap dead volume location records + */ +static void afs_vlocation_reaper(struct work_struct *work) +{ + LIST_HEAD(corpses); + struct afs_vlocation *vl; + unsigned long delay, expiry; + time_t now; - if (!atomic_read(&vlocation->usage)) - goto out_unlock1; + _enter(""); - spin_lock(&afs_vlocation_update_lock); + now = get_seconds(); + spin_lock(&afs_vlocation_graveyard_lock); + + while (!list_empty(&afs_vlocation_graveyard)) { + vl = list_entry(afs_vlocation_graveyard.next, + struct afs_vlocation, grave); + + _debug("check %p", vl); + + /* the queue is ordered most dead first */ + expiry = vl->time_of_death + afs_vlocation_timeout; + if (expiry > now) { + delay = (expiry - now) * HZ; + _debug("delay %lu", delay); + if (!schedule_delayed_work(&afs_vlocation_reap, + delay)) { + cancel_delayed_work(&afs_vlocation_reap); + schedule_delayed_work(&afs_vlocation_reap, + delay); + } + break; + } - /* if we were woken up due to EBUSY sleep then restart immediately if - * possible or else jump to front of pending queue */ - if (vlocation->upd_state == AFS_VLUPD_BUSYSLEEP) { - if (afs_vlocation_update) { - list_add(&vlocation->upd_op.link, - &afs_vlocation_update_pendq); + spin_lock(&vl->cell->vl_lock); + if (atomic_read(&vl->usage) > 0) { + _debug("no reap"); + list_del_init(&vl->grave); } else { - afs_get_vlocation(vlocation); - afs_vlocation_update = vlocation; - vlocation->upd_state = AFS_VLUPD_INPROGRESS; + _debug("reap"); + list_move_tail(&vl->grave, &corpses); + list_del_init(&vl->link); } - goto out_unlock2; + spin_unlock(&vl->cell->vl_lock); } - /* put on pending queue if there's already another update in progress */ - if (afs_vlocation_update) { - vlocation->upd_state = AFS_VLUPD_PENDING; - list_add_tail(&vlocation->upd_op.link, - &afs_vlocation_update_pendq); - goto out_unlock2; - } + spin_unlock(&afs_vlocation_graveyard_lock); - /* hold a ref on it while actually updating */ - afs_get_vlocation(vlocation); - afs_vlocation_update = vlocation; - vlocation->upd_state = AFS_VLUPD_INPROGRESS; - - spin_unlock(&afs_vlocation_update_lock); - spin_unlock(&vlocation->cell->vl_gylock); - - /* okay... we can start the update */ - _debug("BEGIN VL UPDATE [%s]", vlocation->vldb.name); - vlocation->upd_first_svix = vlocation->cell->vl_curr_svix; - vlocation->upd_curr_svix = vlocation->upd_first_svix; - vlocation->upd_rej_cnt = 0; - vlocation->upd_busy_cnt = 0; - - ret = afs_vlocation_update_begin(vlocation); - if (ret < 0) { - afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret); - afs_kafstimod_add_timer(&vlocation->upd_timer, - AFS_VLDB_TIMEOUT); - afs_put_vlocation(vlocation); + /* now reap the corpses we've extracted */ + while (!list_empty(&corpses)) { + vl = list_entry(corpses.next, struct afs_vlocation, grave); + list_del(&vl->grave); + afs_vlocation_destroy(vl); } _leave(""); - return; +} -out_unlock2: - spin_unlock(&afs_vlocation_update_lock); -out_unlock1: - spin_unlock(&vlocation->cell->vl_gylock); - _leave(""); +/* + * initialise the VL update process + */ +int __init afs_vlocation_update_init(void) +{ + afs_vlocation_update_worker = + create_singlethread_workqueue("kafs_vlupdated"); + return afs_vlocation_update_worker ? 0 : -ENOMEM; +} + +/* + * discard all the volume location records for rmmod + */ +void __exit afs_vlocation_purge(void) +{ + afs_vlocation_timeout = 0; + + spin_lock(&afs_vlocation_updates_lock); + list_del_init(&afs_vlocation_updates); + spin_unlock(&afs_vlocation_updates_lock); + cancel_delayed_work(&afs_vlocation_update); + queue_delayed_work(afs_vlocation_update_worker, + &afs_vlocation_update, 0); + destroy_workqueue(afs_vlocation_update_worker); + + cancel_delayed_work(&afs_vlocation_reap); + schedule_delayed_work(&afs_vlocation_reap, 0); } /* - * attend to an update operation upon which an event happened - * - called in kafsasyncd context + * update a volume location */ -static void afs_vlocation_update_attend(struct afs_async_op *op) +static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; - struct afs_vlocation *vlocation = - list_entry(op, struct afs_vlocation, upd_op); - unsigned tmp; + struct afs_vlocation *vl, *xvl; + time_t now; + long timeout; int ret; - _enter("%s", vlocation->vldb.name); - - ret = afs_rxvl_get_entry_by_id_async2(op, &vldb); - switch (ret) { - case -EAGAIN: - _leave(" [unfinished]"); - return; - - case 0: - _debug("END VL UPDATE: %d\n", ret); - vlocation->valid = 1; - - _debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }", - vldb.vidmask, - ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0], - ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1], - ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2] - ); - - _debug("Vids: %08x %08x %08x", - vldb.vid[0], vldb.vid[1], vldb.vid[2]); - - afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0); - - down_write(&vlocation->cell->vl_sem); - - /* actually update the cache */ - if (strncmp(vldb.name, vlocation->vldb.name, - sizeof(vlocation->vldb.name)) != 0) - printk("kAFS: name of volume '%s'" - " changed to '%s' on server\n", - vlocation->vldb.name, vldb.name); - - memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb)); - -#if 0 - /* TODO update volume entry in local cache */ -#endif - - up_write(&vlocation->cell->vl_sem); - - if (ret < 0) - printk("kAFS: failed to update local cache: %d\n", ret); - - afs_kafstimod_add_timer(&vlocation->upd_timer, - AFS_VLDB_TIMEOUT); - afs_put_vlocation(vlocation); - _leave(" [found]"); - return; - - case -ENOMEDIUM: - vlocation->upd_rej_cnt++; - goto try_next; - - /* the server is locked - retry in a very short while */ - case -EBUSY: - vlocation->upd_busy_cnt++; - if (vlocation->upd_busy_cnt > 3) - goto try_next; /* too many retries */ - - afs_vlocation_update_abandon(vlocation, - AFS_VLUPD_BUSYSLEEP, 0); - afs_kafstimod_add_timer(&vlocation->upd_timer, HZ / 2); - afs_put_vlocation(vlocation); - _leave(" [busy]"); - return; - - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -EREMOTEIO: - /* record bad vlserver info in the cell too - * - TODO: use down_write_trylock() if available - */ - if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix) - vlocation->cell->vl_curr_svix = - vlocation->cell->vl_curr_svix % - vlocation->cell->vl_naddrs; - - case -EBADRQC: - case -EINVAL: - case -EACCES: - case -EBADMSG: - goto try_next; - - default: - goto abandon; - } - - /* try contacting the next server */ -try_next: - vlocation->upd_busy_cnt = 0; - - /* discard the server record */ - afs_put_server(vlocation->upd_op.server); - vlocation->upd_op.server = NULL; + _enter(""); - tmp = vlocation->cell->vl_naddrs; - if (tmp == 0) - goto abandon; + now = get_seconds(); - vlocation->upd_curr_svix++; - if (vlocation->upd_curr_svix >= tmp) - vlocation->upd_curr_svix = 0; - if (vlocation->upd_first_svix >= tmp) - vlocation->upd_first_svix = tmp - 1; + /* find a record to update */ + spin_lock(&afs_vlocation_updates_lock); + for (;;) { + if (list_empty(&afs_vlocation_updates)) { + spin_unlock(&afs_vlocation_updates_lock); + _leave(" [nothing]"); + return; + } - /* move to the next server */ - if (vlocation->upd_curr_svix != vlocation->upd_first_svix) { - afs_vlocation_update_begin(vlocation); - _leave(" [next]"); - return; + vl = list_entry(afs_vlocation_updates.next, + struct afs_vlocation, update); + if (atomic_read(&vl->usage) > 0) + break; + list_del_init(&vl->update); } - /* run out of servers to try - was the volume rejected? */ - if (vlocation->upd_rej_cnt > 0) { - printk("kAFS: Active volume no longer valid '%s'\n", - vlocation->vldb.name); - vlocation->valid = 0; - afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0); - afs_kafstimod_add_timer(&vlocation->upd_timer, - AFS_VLDB_TIMEOUT); - afs_put_vlocation(vlocation); - _leave(" [invalidated]"); + timeout = vl->update_at - now; + if (timeout > 0) { + queue_delayed_work(afs_vlocation_update_worker, + &afs_vlocation_update, timeout * HZ); + spin_unlock(&afs_vlocation_updates_lock); + _leave(" [nothing]"); return; } - /* abandon the update */ -abandon: - afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret); - afs_kafstimod_add_timer(&vlocation->upd_timer, HZ * 10); - afs_put_vlocation(vlocation); - _leave(" [abandoned]"); -} + list_del_init(&vl->update); + atomic_inc(&vl->usage); + spin_unlock(&afs_vlocation_updates_lock); -/* - * deal with an update operation being discarded - * - called in kafsasyncd context when it's dying due to rmmod - * - the call has already been aborted and put()'d - */ -static void afs_vlocation_update_discard(struct afs_async_op *op) -{ - struct afs_vlocation *vlocation = - list_entry(op, struct afs_vlocation, upd_op); - - _enter("%s", vlocation->vldb.name); + /* we can now perform the update */ + _debug("update %s", vl->vldb.name); + vl->state = AFS_VL_UPDATING; + vl->upd_rej_cnt = 0; + vl->upd_busy_cnt = 0; - afs_put_server(op->server); - op->server = NULL; - - afs_put_vlocation(vlocation); + ret = afs_vlocation_update_record(vl, &vldb); + switch (ret) { + case 0: + afs_vlocation_apply_update(vl, &vldb); + vl->state = AFS_VL_VALID; + break; + case -ENOMEDIUM: + vl->state = AFS_VL_VOLUME_DELETED; + break; + default: + vl->state = AFS_VL_UNCERTAIN; + break; + } - _leave(""); -} + /* and then reschedule */ + _debug("reschedule"); + vl->update_at = get_seconds() + afs_vlocation_update_timeout; -/* - * match a VLDB record stored in the cache - * - may also load target from entry - */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry) -{ - const struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = target; - - _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); - - if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 - ) { - if (!vlocation->valid || - vlocation->vldb.rtime == vldb->rtime - ) { - vlocation->vldb = *vldb; - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return CACHEFS_MATCH_SUCCESS; - } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { - /* delete if VIDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, - &vldb->vid, - sizeof(vldb->vid)) != 0) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; - } + spin_lock(&afs_vlocation_updates_lock); - _leave(" = UPDATE"); - return CACHEFS_MATCH_SUCCESS_UPDATE; - } else { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } + if (!list_empty(&afs_vlocation_updates)) { + /* next update in 10 minutes, but wait at least 1 second more + * than the newest record already queued so that we don't spam + * the VL server suddenly with lots of requests + */ + xvl = list_entry(afs_vlocation_updates.prev, + struct afs_vlocation, update); + if (vl->update_at <= xvl->update_at) + vl->update_at = xvl->update_at + 1; + xvl = list_entry(afs_vlocation_updates.next, + struct afs_vlocation, update); + timeout = xvl->update_at - now; + if (timeout < 0) + timeout = 0; + } else { + timeout = afs_vlocation_update_timeout; } - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} -#endif + ASSERT(list_empty(&vl->update)); -/* - * update a VLDB record stored in the cache - */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vlocation_cache_update(void *source, void *entry) -{ - struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = source; - - _enter(""); + list_add_tail(&vl->update, &afs_vlocation_updates); - *vldb = vlocation->vldb; + _debug("timeout %ld", timeout); + queue_delayed_work(afs_vlocation_update_worker, + &afs_vlocation_update, timeout * HZ); + spin_unlock(&afs_vlocation_updates_lock); + afs_put_vlocation(vl); } -#endif |