summaryrefslogtreecommitdiffstats
path: root/fs/nfs/pnfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r--fs/nfs/pnfs.c385
1 files changed, 353 insertions, 32 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 891a0c36f99..d1ad7df3479 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
printk(KERN_ERR "%s id 0 is reserved\n", __func__);
return status;
}
+ if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
+ printk(KERN_ERR "%s Layout driver must provide "
+ "alloc_lseg and free_lseg.\n", __func__);
+ return status;
+ }
spin_lock(&pnfs_spinlock);
tmp = find_pnfs_driver_locked(ld_type->id);
@@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
}
EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
+/*
+ * pNFS client layout cache
+ */
+
static void
get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
{
@@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
}
}
-static void
+void
put_layout_hdr(struct inode *inode)
{
spin_lock(&inode->i_lock);
@@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref)
struct inode *ino = lseg->layout->inode;
dprintk("--> %s\n", __func__);
- kfree(lseg);
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
/* List does not take a reference, so no need for put here */
list_del_init(&lo->layouts);
spin_unlock(&clp->cl_lock);
+ write_seqlock(&lo->seqlock);
+ clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ write_sequnlock(&lo->seqlock);
dprintk("%s:Return\n", __func__);
}
@@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
}
}
-static void pnfs_insert_layout(struct pnfs_layout_hdr *lo,
- struct pnfs_layout_segment *lseg);
+/* update lo->stateid with new if is more recent
+ *
+ * lo->stateid could be the open stateid, in which case we just use what given.
+ */
+static void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new)
+{
+ nfs4_stateid *old = &lo->stateid;
+ bool overwrite = false;
+
+ write_seqlock(&lo->seqlock);
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
+ memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
+ overwrite = true;
+ else {
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(old->stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0)
+ overwrite = true;
+ }
+ if (overwrite)
+ memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
+ write_sequnlock(&lo->seqlock);
+}
+
+static void
+pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
+ struct nfs4_state *state)
+{
+ int seq;
+
+ dprintk("--> %s\n", __func__);
+ write_seqlock(&lo->seqlock);
+ do {
+ seq = read_seqbegin(&state->seqlock);
+ memcpy(lo->stateid.data, state->stateid.data,
+ sizeof(state->stateid.data));
+ } while (read_seqretry(&state->seqlock, seq));
+ set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ write_sequnlock(&lo->seqlock);
+ dprintk("<-- %s\n", __func__);
+}
+
+void
+pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
+{
+ int seq;
-/* Get layout from server. */
+ dprintk("--> %s\n", __func__);
+ do {
+ seq = read_seqbegin(&lo->seqlock);
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
+ /* This will trigger retry of the read */
+ pnfs_layout_from_open_stateid(lo, open_state);
+ } else
+ memcpy(dst->data, lo->stateid.data,
+ sizeof(lo->stateid.data));
+ } while (read_seqretry(&lo->seqlock, seq));
+ dprintk("<-- %s\n", __func__);
+}
+
+/*
+* Get layout from server.
+* for now, assume that whole file layouts are requested.
+* arg->offset: 0
+* arg->length: all ones
+*/
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
u32 iomode)
{
struct inode *ino = lo->inode;
- struct pnfs_layout_segment *lseg;
+ struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs4_layoutget *lgp;
+ struct pnfs_layout_segment *lseg = NULL;
+
+ dprintk("--> %s\n", __func__);
- /* Lets pretend we sent LAYOUTGET and got a response */
- lseg = kzalloc(sizeof(*lseg), GFP_KERNEL);
+ BUG_ON(ctx == NULL);
+ lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+ if (lgp == NULL) {
+ put_layout_hdr(lo->inode);
+ return NULL;
+ }
+ lgp->args.minlength = NFS4_MAX_UINT64;
+ lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+ lgp->args.range.iomode = iomode;
+ lgp->args.range.offset = 0;
+ lgp->args.range.length = NFS4_MAX_UINT64;
+ lgp->args.type = server->pnfs_curr_ld->id;
+ lgp->args.inode = ino;
+ lgp->args.ctx = get_nfs_open_context(ctx);
+ lgp->lsegpp = &lseg;
+
+ /* Synchronously retrieve layout information from server and
+ * store in lseg.
+ */
+ nfs4_proc_layoutget(lgp);
if (!lseg) {
+ /* remember that LAYOUTGET failed and suspend trying */
set_bit(lo_fail_bit(iomode), &lo->state);
- spin_lock(&ino->i_lock);
- put_layout_hdr_locked(lo);
- spin_unlock(&ino->i_lock);
- return NULL;
}
- init_lseg(lo, lseg);
- lseg->iomode = IOMODE_RW;
- spin_lock(&ino->i_lock);
- pnfs_insert_layout(lo, lseg);
- put_layout_hdr_locked(lo);
- spin_unlock(&ino->i_lock);
return lseg;
}
+/*
+ * Compare two layout segments for sorting into layout cache.
+ * We want to preferentially return RW over RO layouts, so ensure those
+ * are seen first.
+ */
+static s64
+cmp_layout(u32 iomode1, u32 iomode2)
+{
+ /* read > read/write */
+ return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
+}
+
static void
pnfs_insert_layout(struct pnfs_layout_hdr *lo,
struct pnfs_layout_segment *lseg)
{
+ struct pnfs_layout_segment *lp;
+ int found = 0;
+
dprintk("%s:Begin\n", __func__);
assert_spin_locked(&lo->inode->i_lock);
@@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
list_add_tail(&lo->layouts, &clp->cl_layouts);
spin_unlock(&clp->cl_lock);
}
- get_layout_hdr_locked(lo);
- /* STUB - add the constructed lseg if necessary */
- if (list_empty(&lo->segs)) {
+ list_for_each_entry(lp, &lo->segs, fi_list) {
+ if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+ continue;
+ list_add_tail(&lseg->fi_list, &lp->fi_list);
+ dprintk("%s: inserted lseg %p "
+ "iomode %d offset %llu length %llu before "
+ "lp %p iomode %d offset %llu length %llu\n",
+ __func__, lseg, lseg->range.iomode,
+ lseg->range.offset, lseg->range.length,
+ lp, lp->range.iomode, lp->range.offset,
+ lp->range.length);
+ found = 1;
+ break;
+ }
+ if (!found) {
list_add_tail(&lseg->fi_list, &lo->segs);
- dprintk("%s: inserted lseg %p iomode %d at tail\n",
- __func__, lseg, lseg->iomode);
- } else {
- /* There is no harm for the moment in calling this
- * with the lock held, and the call will be removed
- * with the STUB.
- */
- put_lseg(lseg);
+ dprintk("%s: inserted lseg %p "
+ "iomode %d offset %llu length %llu at tail\n",
+ __func__, lseg, lseg->range.iomode,
+ lseg->range.offset, lseg->range.length);
}
+ get_layout_hdr_locked(lo);
dprintk("%s:Return\n", __func__);
}
@@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino)
lo->refcount = 1;
INIT_LIST_HEAD(&lo->layouts);
INIT_LIST_HEAD(&lo->segs);
+ seqlock_init(&lo->seqlock);
lo->inode = ino;
return lo;
}
@@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino)
return nfsi->layout;
}
-/* STUB - LAYOUTGET never succeeds, so cache is empty */
+/*
+ * iomode matching rules:
+ * iomode lseg match
+ * ----- ----- -----
+ * ANY READ true
+ * ANY RW true
+ * RW READ false
+ * RW RW true
+ * READ READ true
+ * READ RW true
+ */
+static int
+is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
+{
+ return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+}
+
+/*
+ * lookup range in layout
+ */
static struct pnfs_layout_segment *
pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
{
- return NULL;
+ struct pnfs_layout_segment *lseg, *ret = NULL;
+
+ dprintk("%s:Begin\n", __func__);
+
+ assert_spin_locked(&lo->inode->i_lock);
+ list_for_each_entry(lseg, &lo->segs, fi_list) {
+ if (is_matching_lseg(lseg, iomode)) {
+ ret = lseg;
+ break;
+ }
+ if (cmp_layout(iomode, lseg->range.iomode) > 0)
+ break;
+ }
+
+ dprintk("%s:Return lseg %p ref %d\n",
+ __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+ return ret;
}
/*
@@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino,
if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
goto out_unlock;
- get_layout_hdr_locked(lo);
+ get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, iomode);
@@ -460,3 +612,172 @@ out_unlock:
spin_unlock(&ino->i_lock);
goto out;
}
+
+int
+pnfs_layout_process(struct nfs4_layoutget *lgp)
+{
+ struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
+ struct nfs4_layoutget_res *res = &lgp->res;
+ struct pnfs_layout_segment *lseg;
+ struct inode *ino = lo->inode;
+ int status = 0;
+
+ /* Inject layout blob into I/O device driver */
+ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+ if (!lseg || IS_ERR(lseg)) {
+ if (!lseg)
+ status = -ENOMEM;
+ else
+ status = PTR_ERR(lseg);
+ dprintk("%s: Could not allocate layout: error %d\n",
+ __func__, status);
+ goto out;
+ }
+
+ spin_lock(&ino->i_lock);
+ init_lseg(lo, lseg);
+ lseg->range = res->range;
+ *lgp->lsegpp = lseg;
+ pnfs_insert_layout(lo, lseg);
+
+ /* Done processing layoutget. Set the layout stateid */
+ pnfs_set_layout_stateid(lo, &res->stateid);
+ spin_unlock(&ino->i_lock);
+out:
+ return status;
+}
+
+/*
+ * Device ID cache. Currently supports one layout type per struct nfs_client.
+ * Add layout type to the lookup key to expand to support multiple types.
+ */
+int
+pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
+ void (*free_callback)(struct pnfs_deviceid_node *))
+{
+ struct pnfs_deviceid_cache *c;
+
+ c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_devid_cache != NULL) {
+ atomic_inc(&clp->cl_devid_cache->dc_ref);
+ dprintk("%s [kref [%d]]\n", __func__,
+ atomic_read(&clp->cl_devid_cache->dc_ref));
+ kfree(c);
+ } else {
+ /* kzalloc initializes hlists */
+ spin_lock_init(&c->dc_lock);
+ atomic_set(&c->dc_ref, 1);
+ c->dc_free_callback = free_callback;
+ clp->cl_devid_cache = c;
+ dprintk("%s [new]\n", __func__);
+ }
+ spin_unlock(&clp->cl_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
+
+/*
+ * Called from pnfs_layoutdriver_type->free_lseg
+ * last layout segment reference frees deviceid
+ */
+void
+pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
+ struct pnfs_deviceid_node *devid)
+{
+ struct nfs4_deviceid *id = &devid->de_id;
+ struct pnfs_deviceid_node *d;
+ struct hlist_node *n;
+ long h = nfs4_deviceid_hash(id);
+
+ dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
+ if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
+ return;
+
+ hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
+ if (!memcmp(&d->de_id, id, sizeof(*id))) {
+ hlist_del_rcu(&d->de_node);
+ spin_unlock(&c->dc_lock);
+ synchronize_rcu();
+ c->dc_free_callback(devid);
+ return;
+ }
+ spin_unlock(&c->dc_lock);
+ /* Why wasn't it found in the list? */
+ BUG();
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
+
+/* Find and reference a deviceid */
+struct pnfs_deviceid_node *
+pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
+{
+ struct pnfs_deviceid_node *d;
+ struct hlist_node *n;
+ long hash = nfs4_deviceid_hash(id);
+
+ dprintk("--> %s hash %ld\n", __func__, hash);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+ if (!memcmp(&d->de_id, id, sizeof(*id))) {
+ if (!atomic_inc_not_zero(&d->de_ref)) {
+ goto fail;
+ } else {
+ rcu_read_unlock();
+ return d;
+ }
+ }
+ }
+fail:
+ rcu_read_unlock();
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
+
+/*
+ * Add a deviceid to the cache.
+ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
+ */
+struct pnfs_deviceid_node *
+pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
+{
+ struct pnfs_deviceid_node *d;
+ long hash = nfs4_deviceid_hash(&new->de_id);
+
+ dprintk("--> %s hash %ld\n", __func__, hash);
+ spin_lock(&c->dc_lock);
+ d = pnfs_find_get_deviceid(c, &new->de_id);
+ if (d) {
+ spin_unlock(&c->dc_lock);
+ dprintk("%s [discard]\n", __func__);
+ c->dc_free_callback(new);
+ return d;
+ }
+ INIT_HLIST_NODE(&new->de_node);
+ atomic_set(&new->de_ref, 1);
+ hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
+ spin_unlock(&c->dc_lock);
+ dprintk("%s [new]\n", __func__);
+ return new;
+}
+EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
+
+void
+pnfs_put_deviceid_cache(struct nfs_client *clp)
+{
+ struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
+
+ dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+ if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
+ int i;
+ /* Verify cache is empty */
+ for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
+ BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
+ clp->cl_devid_cache = NULL;
+ spin_unlock(&clp->cl_lock);
+ kfree(local);
+ }
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);