From 92407e75ce45b41c46944891711fd8faf0714d84 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sun, 23 Oct 2011 20:21:17 -0700 Subject: nfs4: serialize layoutcommit Current pnfs_layoutcommit_inode can not handle parallel layoutcommit. And as Trond suggested , there is no need for client to optimize for parallel layoutcommit. So add NFS_INO_LAYOUTCOMMITTING flag to mark inflight layoutcommit and serialize lalyoutcommit with it. Also mark_inode_dirty_sync if pnfs_layoutcommit_inode fails to issue layoutcommit. Reported-by: Vitaliy Gusev Signed-off-by: Peng Tao Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++++ fs/nfs/pnfs.c | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2ae413c986..b60fddf606f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5950,6 +5950,7 @@ static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; struct pnfs_layout_segment *lseg, *tmp; + unsigned long *bitlock = &NFS_I(data->args.inode)->flags; pnfs_cleanup_layoutcommit(data); /* Matched by references in pnfs_set_layoutcommit */ @@ -5959,6 +5960,11 @@ static void nfs4_layoutcommit_release(void *calldata) &lseg->pls_flags)) put_lseg(lseg); } + + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); + put_rpccred(data->cred); kfree(data); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ee73d9a4f70..a2478bc7444 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1443,17 +1443,31 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ data = kzalloc(sizeof(*data), GFP_NOFS); if (!data) { - mark_inode_dirty_sync(inode); status = -ENOMEM; goto out; } + if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + goto out_free; + + if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { + if (!sync) { + status = -EAGAIN; + goto out_free; + } + status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (status) + goto out_free; + } + INIT_LIST_HEAD(&data->lseg_list); spin_lock(&inode->i_lock); if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); spin_unlock(&inode->i_lock); - kfree(data); - goto out; + wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); + goto out_free; } pnfs_list_write_lseg(inode, &data->lseg_list); @@ -1475,6 +1489,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) status = nfs4_proc_layoutcommit(data, sync); out: + if (status) + mark_inode_dirty_sync(inode); dprintk("<-- %s status %d\n", __func__, status); return status; +out_free: + kfree(data); + goto out; } -- cgit v1.2.3-70-g09d2 From d743c3c9c236cc61403a4f7d6283d59ddf68b2bd Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sun, 23 Oct 2011 20:22:38 -0700 Subject: NFS4: fix cb_recallany decode error craa_type_mask is bitmap4 per RFC5661. We need to expect a length before extracting bitmap value. Cc: Alexandros Batsakis Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 918ad647afe..ee1a5b3cd48 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -488,17 +488,18 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallanyargs *args) { - __be32 *p; + uint32_t bitmap[2]; + __be32 *p, status; args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); args->craa_objs_to_keep = ntohl(*p++); - p = read_buf(xdr, 4); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_BADXDR); - args->craa_type_mask = ntohl(*p); + status = decode_bitmap(xdr, bitmap); + if (unlikely(status)) + return status; + args->craa_type_mask = bitmap[0]; return 0; } -- cgit v1.2.3-70-g09d2 From c02f557dd0a026d7147da3b6f7daf52c6ff5580f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Oct 2011 12:17:43 -0400 Subject: NFS: Fix documenting comment for nfs_create_request() Clean up: the first parameter of nfs_create_request() has been incorrectly documented since time immemorial (OK, since before 2.6.12). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b60970cc7f1..0a5ff5c1951 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -41,7 +41,7 @@ nfs_page_free(struct nfs_page *p) /** * nfs_create_request - Create an NFS read/write request. - * @file: file descriptor to use + * @ctx: open context to use * @inode: inode to which the request is attached * @page: page to write * @offset: starting offset within the page for the write -- cgit v1.2.3-70-g09d2 From c6e696660213a89a5bfde8b49d539553904c808f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Oct 2011 12:17:53 -0400 Subject: NFS: Clean up nfs4_xdr_dec_secinfo() Clean up: Remove superfluous logic at the tail of nfs4_xdr_dec_secinfo() . Introduced by commit 5a5ea0d4 "NFS: Add secinfo procedure" (March 24, 2011). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1dce12f41a4..e6161b213ed 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6602,8 +6602,6 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp, if (status) goto out; status = decode_secinfo(xdr, res); - if (status) - goto out; out: return status; } -- cgit v1.2.3-70-g09d2 From e414966b81a74745ac8d6bfeda0d95fb721e6d91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Oct 2011 12:18:03 -0400 Subject: NFS: Remove no-op less-than-zero checks on unsigned variables. Introduced by commit 16b374ca "NFSv4.1: pnfs: filelayout: add driver's LAYOUTGET and GETDEVICEINFO infrastructure" (October 20, 2010). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 09119418402..12185aadb34 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -449,9 +449,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, fl->dsaddr = dsaddr; - if (fl->first_stripe_index < 0 || - fl->first_stripe_index >= dsaddr->stripe_count) { - dprintk("%s Bad first_stripe_index %d\n", + if (fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %u\n", __func__, fl->first_stripe_index); goto out_put; } @@ -552,7 +551,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. * Futher checking is done in filelayout_check_layout */ - if (fl->num_fh < 0 || fl->num_fh > + if (fl->num_fh > max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) goto out_err; -- cgit v1.2.3-70-g09d2 From 6f276e49fd108362be3fd67154aaaacf872ea026 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Tue, 1 Nov 2011 12:16:15 +0600 Subject: nfs: Fix unused variable warning from file.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following unused variable warning. fs/nfs/file.c: In function ‘nfs_file_release’: fs/nfs/file.c:140:17: warning: unused variable ‘dentry’ fs/nfs/file.c: In function ‘nfs_file_read’: fs/nfs/file.c:237:9: warning: unused variable ‘count’ Signed-off-by: Rakib Mullick Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 28b8c3f3cda..bd7dff00110 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,11 +137,9 @@ nfs_file_open(struct inode *inode, struct file *filp) static int nfs_file_release(struct inode *inode, struct file *filp) { - struct dentry *dentry = filp->f_path.dentry; - dprintk("NFS: release(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); @@ -234,14 +232,13 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; ssize_t result; - size_t count = iov_length(iov, nr_segs); if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos); dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long) pos); + (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { -- cgit v1.2.3-70-g09d2 From 2b72c9ccd22c4a3299e5a358dcd639fb253730f4 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Tue, 1 Nov 2011 12:23:42 +0600 Subject: nfs: Remove unused variable from write.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_NFS=y and CONFIG_NFS_V3_{,V4}=n we get the following warning. fs/nfs/write.c: In function ‘nfs_writeback_done’: fs/nfs/write.c:1246:21: warning: unused variable ‘server’ Remove the variable 'server' to fix the above warning. Signed-off-by: Rakib Mullick Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2219c88d96b..b016b8a3639 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1243,7 +1243,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; - struct nfs_server *server = NFS_SERVER(data->inode); int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1277,7 +1276,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - server->nfs_client->cl_hostname, + NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } -- cgit v1.2.3-70-g09d2 From 4cdc685c7d06f659ef6c336d4242005cdd8df401 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 14:45:06 -0700 Subject: pnfs-obj: Remove redundant EOF from objlayout_io_state The EOF calculation was done on .read_pagelist(), cached in objlayout_io_state->eof, and set in objlayout_read_done() into nfs_read_data->res.eof. So set it directly into nfs_read_data->res.eof and avoid the extra member. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objlayout.c | 16 +++++++--------- fs/nfs/objlayout/objlayout.h | 1 - 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 1d06f8e2ade..1300736e0fb 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -287,17 +287,14 @@ static void _rpc_read_complete(struct work_struct *work) void objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) { - int eof = state->eof; - struct nfs_read_data *rdata; + struct nfs_read_data *rdata = state->rpcdata; state->status = status; - dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); - rdata = state->rpcdata; + dprintk("%s: Begin status=%zd eof=%d\n", __func__, + status, rdata->res.eof); rdata->task.tk_status = status; - if (status >= 0) { + if (status >= 0) rdata->res.count = status; - rdata->res.eof = eof; - } objlayout_iodone(state); /* must not use state after this point */ @@ -330,11 +327,14 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) status = 0; rdata->res.count = 0; rdata->res.eof = 1; + /*FIXME: do we need to call pnfs_ld_read_done() */ goto out; } count = eof - offset; } + rdata->res.eof = (offset + count) >= eof; + state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, rdata->args.pages, rdata->args.pgbase, offset, count, @@ -345,8 +345,6 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) goto out; } - state->eof = state->offset + state->count >= eof; - status = objio_read_pagelist(state); out: dprintk("%s: Return status %Zd\n", __func__, status); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index a8244c8e042..ffb884c6fef 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -86,7 +86,6 @@ struct objlayout_io_state { void *rpcdata; int status; /* res */ - int eof; /* res */ int committed; /* res */ /* Error reporting (layout_return) */ -- cgit v1.2.3-70-g09d2 From e6c40fe3f4c4967f1cb486191ed4a5d5f55f3f7e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 14:45:46 -0700 Subject: pnfs-obj: Return PNFS_NOT_ATTEMPTED in case of read/write_pagelist objlayout driver was always returning PNFS_ATTEMPTED from it's read/write_pagelist operations. Even on error. Fix that. Start by establishing an error return API from io-engine, by not returning ssize_t (length-or-error) but returning "int" 0=OK, 0>Error. And clean up all return types in io-engine. Then if io-engine returned error return PNFS_NOT_ATTEMPTED to generic layer. (With a dprint) Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 32 ++++++++++++++++---------------- fs/nfs/objlayout/objlayout.c | 36 +++++++++++++++++++----------------- fs/nfs/objlayout/objlayout.h | 4 ++-- 3 files changed, 37 insertions(+), 35 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d0cda12fddc..0c7c9ec24e6 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -142,7 +142,7 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) } struct objio_state; -typedef ssize_t (*objio_done_fn)(struct objio_state *ios); +typedef int (*objio_done_fn)(struct objio_state *ios); struct objio_state { /* Generic layer */ @@ -720,7 +720,7 @@ out: return 0; } -static ssize_t _sync_done(struct objio_state *ios) +static int _sync_done(struct objio_state *ios) { struct completion *waiting = ios->private; @@ -742,10 +742,10 @@ static void _done_io(struct osd_request *or, void *p) kref_put(&ios->kref, _last_io); } -static ssize_t _io_exec(struct objio_state *ios) +static int _io_exec(struct objio_state *ios) { DECLARE_COMPLETION_ONSTACK(wait); - ssize_t status = 0; /* sync status */ + int ret = 0; unsigned i; objio_done_fn saved_done_fn = ios->done; bool sync = ios->ol_state.sync; @@ -771,16 +771,16 @@ static ssize_t _io_exec(struct objio_state *ios) if (sync) { wait_for_completion(&wait); - status = saved_done_fn(ios); + ret = saved_done_fn(ios); } - return status; + return ret; } /* * read */ -static ssize_t _read_done(struct objio_state *ios) +static int _read_done(struct objio_state *ios) { ssize_t status; int ret = _io_check(ios, false); @@ -793,7 +793,7 @@ static ssize_t _read_done(struct objio_state *ios) status = ret; objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); - return status; + return ret; } static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) @@ -833,7 +833,7 @@ err: return ret; } -static ssize_t _read_exec(struct objio_state *ios) +static int _read_exec(struct objio_state *ios) { unsigned i; int ret; @@ -847,14 +847,14 @@ static ssize_t _read_exec(struct objio_state *ios) } ios->done = _read_done; - return _io_exec(ios); /* In sync mode exec returns the io status */ + return _io_exec(ios); err: _io_free(ios); return ret; } -ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) +int objio_read_pagelist(struct objlayout_io_state *ol_state) { struct objio_state *ios = container_of(ol_state, struct objio_state, ol_state); @@ -870,7 +870,7 @@ ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) /* * write */ -static ssize_t _write_done(struct objio_state *ios) +static int _write_done(struct objio_state *ios) { ssize_t status; int ret = _io_check(ios, true); @@ -887,7 +887,7 @@ static ssize_t _write_done(struct objio_state *ios) } objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); - return status; + return ret; } static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) @@ -955,7 +955,7 @@ err: return ret; } -static ssize_t _write_exec(struct objio_state *ios) +static int _write_exec(struct objio_state *ios) { unsigned i; int ret; @@ -969,14 +969,14 @@ static ssize_t _write_exec(struct objio_state *ios) } ios->done = _write_done; - return _io_exec(ios); /* In sync mode exec returns the io->status */ + return _io_exec(ios); err: _io_free(ios); return ret; } -ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) +int objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) { struct objio_state *ios = container_of(ol_state, struct objio_state, ol_state); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 1300736e0fb..99c807df11d 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -315,16 +315,13 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) loff_t offset = rdata->args.offset; size_t count = rdata->args.count; struct objlayout_io_state *state; - ssize_t status = 0; + int err; loff_t eof; - dprintk("%s: Begin inode %p offset %llu count %d\n", - __func__, rdata->inode, offset, (int)count); - eof = i_size_read(rdata->inode); if (unlikely(offset + count > eof)) { if (offset >= eof) { - status = 0; + err = 0; rdata->res.count = 0; rdata->res.eof = 1; /*FIXME: do we need to call pnfs_ld_read_done() */ @@ -341,14 +338,19 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) rdata->lseg, rdata, GFP_KERNEL); if (unlikely(!state)) { - status = -ENOMEM; + err = -ENOMEM; goto out; } + dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", + __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); - status = objio_read_pagelist(state); + err = objio_read_pagelist(state); out: - dprintk("%s: Return status %Zd\n", __func__, status); - rdata->pnfs_error = status; + if (unlikely(err)) { + rdata->pnfs_error = err; + dprintk("%s: Returned Error %d\n", __func__, err); + return PNFS_NOT_ATTEMPTED; + } return PNFS_ATTEMPTED; } @@ -406,10 +408,7 @@ objlayout_write_pagelist(struct nfs_write_data *wdata, int how) { struct objlayout_io_state *state; - ssize_t status; - - dprintk("%s: Begin inode %p offset %llu count %u\n", - __func__, wdata->inode, wdata->args.offset, wdata->args.count); + int err; state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, wdata->args.pages, @@ -419,16 +418,19 @@ objlayout_write_pagelist(struct nfs_write_data *wdata, wdata->lseg, wdata, GFP_NOFS); if (unlikely(!state)) { - status = -ENOMEM; + err = -ENOMEM; goto out; } state->sync = how & FLUSH_SYNC; - status = objio_write_pagelist(state, how & FLUSH_STABLE); + err = objio_write_pagelist(state, how & FLUSH_STABLE); out: - dprintk("%s: Return status %Zd\n", __func__, status); - wdata->pnfs_error = status; + if (unlikely(err)) { + wdata->pnfs_error = err; + dprintk("%s: Returned Error %d\n", __func__, err); + return PNFS_NOT_ATTEMPTED; + } return PNFS_ATTEMPTED; } diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index ffb884c6fef..4edac9b6ac0 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -115,8 +115,8 @@ extern int objio_alloc_io_state( gfp_t gfp_flags); extern void objio_free_io_state(struct objlayout_io_state *state); -extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); -extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, +extern int objio_read_pagelist(struct objlayout_io_state *ol_state); +extern int objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable); /* -- cgit v1.2.3-70-g09d2 From 96218556b03d3c6505e2880a097338bf277fd783 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 14:47:32 -0700 Subject: pnfs-obj: Get rid of objlayout_{alloc,free}_io_state This is part of moving objio_osd to use the ORE. objlayout_io_state had two functions: 1. It was used in the error reporting mechanism at layout_return. This function is kept intact. (Later patch will rename objlayout_io_state => objlayout_io_res) 2. Carrier of rw io members into the objio_read/write_paglist API. This is removed in this patch. The {r,w}data received from NFS are passed directly to the objio_{read,write}_paglist API. The io_engine is now allocating it's own IO state as part of the read/write. The minimal functionality that was part of the generic allocation is passed to the io_engine. So part of this patch is rename of: ios->ol_state.foo => ios->foo At objlayout_{read,write}_done an objlayout_io_state is passed that denotes the result of the IO. (Hence the later name change). If the IO is successful objlayout calls an objio_free_result() API immediately (Which for objio_osd causes the release of the io_state). If the IO ended in an error it is hanged onto until reported in layout_return and is released later through the objio_free_result() API. (All this is not new just renamed and cleaned) Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 94 +++++++++++++++++++++----------- fs/nfs/objlayout/objlayout.c | 124 +++++++++++-------------------------------- fs/nfs/objlayout/objlayout.h | 36 ++++++------- 3 files changed, 112 insertions(+), 142 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 0c7c9ec24e6..48eb91aad55 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -148,6 +148,13 @@ struct objio_state { /* Generic layer */ struct objlayout_io_state ol_state; + struct page **pages; + unsigned pgbase; + unsigned nr_pages; + unsigned long count; + loff_t offset; + bool sync; + struct objio_segment *layout; struct kref kref; @@ -394,30 +401,43 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) kfree(objio_seg); } -int objio_alloc_io_state(struct pnfs_layout_segment *lseg, - struct objlayout_io_state **outp, - gfp_t gfp_flags) +static int +objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, + struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, + loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, + struct objio_state **outp) { struct objio_segment *objio_seg = OBJIO_LSEG(lseg); struct objio_state *ios; - const unsigned first_size = sizeof(*ios) + - objio_seg->num_comps * sizeof(ios->per_dev[0]); - const unsigned sec_size = objio_seg->num_comps * - sizeof(ios->ol_state.ioerrs[0]); - - ios = kzalloc(first_size + sec_size, gfp_flags); - if (unlikely(!ios)) + struct __alloc_objio_state { + struct objio_state objios; + struct _objio_per_comp per_dev[objio_seg->num_comps]; + struct pnfs_osd_ioerr ioerrs[objio_seg->num_comps]; + } *aos; + + aos = kzalloc(sizeof(*aos), gfp_flags); + if (unlikely(!aos)) return -ENOMEM; - ios->layout = objio_seg; - ios->ol_state.ioerrs = ((void *)ios) + first_size; - ios->ol_state.num_comps = objio_seg->num_comps; + ios = &aos->objios; - *outp = &ios->ol_state; + ios->layout = objio_seg; + objlayout_init_ioerrs(&aos->objios.ol_state, objio_seg->num_comps, + aos->ioerrs, rpcdata, pnfs_layout_type); + + ios->pages = pages; + ios->pgbase = pgbase; + ios->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; + ios->offset = offset; + ios->count = count; + ios->sync = 0; + BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); + + *outp = ios; return 0; } -void objio_free_io_state(struct objlayout_io_state *ol_state) +void objio_free_result(struct objlayout_io_state *ol_state) { struct objio_state *ios = container_of(ol_state, struct objio_state, ol_state); @@ -598,7 +618,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, if (per_dev->bio == NULL) { unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); - unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / + unsigned bio_size = (ios->nr_pages + pages_in_stripe) / ios->layout->group_width; if (BIO_MAX_PAGES_KMALLOC < bio_size) @@ -615,11 +635,11 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); unsigned added_len; - BUG_ON(ios->ol_state.nr_pages <= pg); + BUG_ON(ios->nr_pages <= pg); cur_len -= pglen; added_len = bio_add_pc_page(q, per_dev->bio, - ios->ol_state.pages[pg], pglen, pgbase); + ios->pages[pg], pglen, pgbase); if (unlikely(pglen != added_len)) return -ENOMEM; pgbase = 0; @@ -660,7 +680,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, cur_len = stripe_unit - si->unit_off; page_off = si->unit_off & ~PAGE_MASK; BUG_ON(page_off && - (page_off != ios->ol_state.pgbase)); + (page_off != ios->pgbase)); } else { /* dev > si->dev */ per_dev->offset = si->obj_offset - si->unit_off; cur_len = stripe_unit; @@ -693,8 +713,8 @@ out: static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) { - u64 length = ios->ol_state.count; - u64 offset = ios->ol_state.offset; + u64 length = ios->count; + u64 offset = ios->offset; struct _striping_info si; unsigned last_pg = 0; int ret = 0; @@ -748,7 +768,7 @@ static int _io_exec(struct objio_state *ios) int ret = 0; unsigned i; objio_done_fn saved_done_fn = ios->done; - bool sync = ios->ol_state.sync; + bool sync = ios->sync; if (sync) { ios->done = _sync_done; @@ -792,7 +812,7 @@ static int _read_done(struct objio_state *ios) else status = ret; - objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); + objlayout_read_done(&ios->ol_state, status, ios->sync); return ret; } @@ -854,12 +874,18 @@ err: return ret; } -int objio_read_pagelist(struct objlayout_io_state *ol_state) +int objio_read_pagelist(struct nfs_read_data *rdata) { - struct objio_state *ios = container_of(ol_state, struct objio_state, - ol_state); + struct objio_state *ios; int ret; + ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, + rdata->lseg, rdata->args.pages, rdata->args.pgbase, + rdata->args.offset, rdata->args.count, rdata, + GFP_KERNEL, &ios); + if (unlikely(ret)) + return ret; + ret = _io_rw_pagelist(ios, GFP_KERNEL); if (unlikely(ret)) return ret; @@ -886,7 +912,7 @@ static int _write_done(struct objio_state *ios) status = ret; } - objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); + objlayout_write_done(&ios->ol_state, status, ios->sync); return ret; } @@ -976,12 +1002,20 @@ err: return ret; } -int objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) +int objio_write_pagelist(struct nfs_write_data *wdata, int how) { - struct objio_state *ios = container_of(ol_state, struct objio_state, - ol_state); + struct objio_state *ios; int ret; + ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, + wdata->lseg, wdata->args.pages, wdata->args.pgbase, + wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, + &ios); + if (unlikely(ret)) + return ret; + + ios->sync = 0 != (how & FLUSH_SYNC); + /* TODO: ios->stable = stable; */ ret = _io_rw_pagelist(ios, GFP_NOFS); if (unlikely(ret)) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 99c807df11d..a82053ae559 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -156,59 +156,23 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1 : NFS4_MAX_UINT64; } -static struct objlayout_io_state * -objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, - struct page **pages, - unsigned pgbase, - loff_t offset, - size_t count, - struct pnfs_layout_segment *lseg, - void *rpcdata, - gfp_t gfp_flags) +void _fix_verify_io_params(struct pnfs_layout_segment *lseg, + struct page ***p_pages, unsigned *p_pgbase, + u64 offset, unsigned long count) { - struct objlayout_io_state *state; u64 lseg_end_offset; - dprintk("%s: allocating io_state\n", __func__); - if (objio_alloc_io_state(lseg, &state, gfp_flags)) - return NULL; - BUG_ON(offset < lseg->pls_range.offset); lseg_end_offset = end_offset(lseg->pls_range.offset, lseg->pls_range.length); BUG_ON(offset >= lseg_end_offset); - if (offset + count > lseg_end_offset) { - count = lseg->pls_range.length - - (offset - lseg->pls_range.offset); - dprintk("%s: truncated count %Zd\n", __func__, count); - } + WARN_ON(offset + count > lseg_end_offset); - if (pgbase > PAGE_SIZE) { - pages += pgbase >> PAGE_SHIFT; - pgbase &= ~PAGE_MASK; + if (*p_pgbase > PAGE_SIZE) { + dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase); + *p_pages += *p_pgbase >> PAGE_SHIFT; + *p_pgbase &= ~PAGE_MASK; } - - INIT_LIST_HEAD(&state->err_list); - state->lseg = lseg; - state->rpcdata = rpcdata; - state->pages = pages; - state->pgbase = pgbase; - state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; - state->offset = offset; - state->count = count; - state->sync = 0; - - return state; -} - -static void -objlayout_free_io_state(struct objlayout_io_state *state) -{ - dprintk("%s: freeing io_state\n", __func__); - if (unlikely(!state)) - return; - - objio_free_io_state(state); } /* @@ -217,12 +181,10 @@ objlayout_free_io_state(struct objlayout_io_state *state) static void objlayout_iodone(struct objlayout_io_state *state) { - dprintk("%s: state %p status\n", __func__, state); - if (likely(state->status >= 0)) { - objlayout_free_io_state(state); + objio_free_result(state); } else { - struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + struct objlayout *objlay = state->objlay; spin_lock(&objlay->lock); objlay->delta_space_valid = OBJ_DSU_INVALID; @@ -289,15 +251,15 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) { struct nfs_read_data *rdata = state->rpcdata; - state->status = status; - dprintk("%s: Begin status=%zd eof=%d\n", __func__, - status, rdata->res.eof); - rdata->task.tk_status = status; + state->status = rdata->task.tk_status = status; if (status >= 0) rdata->res.count = status; objlayout_iodone(state); /* must not use state after this point */ + dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, + status, rdata->res.eof, sync); + if (sync) pnfs_ld_read_done(rdata); else { @@ -314,7 +276,6 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) { loff_t offset = rdata->args.offset; size_t count = rdata->args.count; - struct objlayout_io_state *state; int err; loff_t eof; @@ -331,20 +292,14 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) } rdata->res.eof = (offset + count) >= eof; + _fix_verify_io_params(rdata->lseg, &rdata->args.pages, + &rdata->args.pgbase, + rdata->args.offset, rdata->args.count); - state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, - rdata->args.pages, rdata->args.pgbase, - offset, count, - rdata->lseg, rdata, - GFP_KERNEL); - if (unlikely(!state)) { - err = -ENOMEM; - goto out; - } dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); - err = objio_read_pagelist(state); + err = objio_read_pagelist(rdata); out: if (unlikely(err)) { rdata->pnfs_error = err; @@ -374,23 +329,18 @@ void objlayout_write_done(struct objlayout_io_state *state, ssize_t status, bool sync) { - struct nfs_write_data *wdata; + struct nfs_write_data *wdata = state->rpcdata; - dprintk("%s: Begin\n", __func__); - wdata = state->rpcdata; - state->status = status; - wdata->task.tk_status = status; + state->status = wdata->task.tk_status = status; if (status >= 0) { wdata->res.count = status; wdata->verf.committed = state->committed; - dprintk("%s: Return status %d committed %d\n", - __func__, wdata->task.tk_status, - wdata->verf.committed); - } else - dprintk("%s: Return status %d\n", - __func__, wdata->task.tk_status); + } objlayout_iodone(state); - /* must not use state after this point */ + /* must not use oir after this point */ + + dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, + status, wdata->verf.committed, sync); if (sync) pnfs_ld_write_done(wdata); @@ -407,25 +357,13 @@ enum pnfs_try_status objlayout_write_pagelist(struct nfs_write_data *wdata, int how) { - struct objlayout_io_state *state; int err; - state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, - wdata->args.pages, - wdata->args.pgbase, - wdata->args.offset, - wdata->args.count, - wdata->lseg, wdata, - GFP_NOFS); - if (unlikely(!state)) { - err = -ENOMEM; - goto out; - } + _fix_verify_io_params(wdata->lseg, &wdata->args.pages, + &wdata->args.pgbase, + wdata->args.offset, wdata->args.count); - state->sync = how & FLUSH_SYNC; - - err = objio_write_pagelist(state, how & FLUSH_STABLE); - out: + err = objio_write_pagelist(wdata, how); if (unlikely(err)) { wdata->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); @@ -564,7 +502,7 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) merge_ioerr(&accumulated_err, ioerr); } list_del(&state->err_list); - objlayout_free_io_state(state); + objio_free_result(state); } pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); @@ -632,7 +570,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, goto loop_done; } list_del(&state->err_list); - objlayout_free_io_state(state); + objio_free_result(state); } loop_done: spin_unlock(&objlay->lock); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 4edac9b6ac0..d7b2ccfa213 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -75,14 +75,7 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) * embedded in objects provider io_state data structure */ struct objlayout_io_state { - struct pnfs_layout_segment *lseg; - - struct page **pages; - unsigned pgbase; - unsigned nr_pages; - unsigned long count; - loff_t offset; - bool sync; + struct objlayout *objlay; void *rpcdata; int status; /* res */ @@ -99,6 +92,18 @@ struct objlayout_io_state { struct pnfs_osd_ioerr *ioerrs; }; +static inline +void objlayout_init_ioerrs(struct objlayout_io_state *oir, unsigned num_comps, + struct pnfs_osd_ioerr *ioerrs, void *rpcdata, + struct pnfs_layout_hdr *pnfs_layout_type) +{ + oir->objlay = OBJLAYOUT(pnfs_layout_type); + oir->rpcdata = rpcdata; + INIT_LIST_HEAD(&oir->err_list); + oir->num_comps = num_comps; + oir->ioerrs = ioerrs; +} + /* * Raid engine I/O API */ @@ -109,15 +114,10 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); -extern int objio_alloc_io_state( - struct pnfs_layout_segment *lseg, - struct objlayout_io_state **outp, - gfp_t gfp_flags); -extern void objio_free_io_state(struct objlayout_io_state *state); +extern void objio_free_result(struct objlayout_io_state *state); -extern int objio_read_pagelist(struct objlayout_io_state *ol_state); -extern int objio_write_pagelist(struct objlayout_io_state *ol_state, - bool stable); +extern int objio_read_pagelist(struct nfs_read_data *rdata); +extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); /* * callback API @@ -127,10 +127,8 @@ extern void objlayout_io_set_result(struct objlayout_io_state *state, int osd_error, u64 offset, u64 length, bool is_write); static inline void -objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) +objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used) { - struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); - /* If one of the I/Os errored out and the delta_space_used was * invalid we render the complete report as invalid. Protocol mandate * the DSU be accurate or not reported. -- cgit v1.2.3-70-g09d2 From e2e04355d9647305c666462a49223f2942a635f0 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 15:03:35 -0700 Subject: pnfs-obj: Rename objlayout_io_state => objlayout_io_res * All instances of objlayout_io_state => objlayout_io_res * All instances of state => oir; * All instances of ol_state => oir; Big but nothing to it Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 17 ++++++------ fs/nfs/objlayout/objlayout.c | 63 ++++++++++++++++++++++---------------------- fs/nfs/objlayout/objlayout.h | 15 ++++++----- 3 files changed, 48 insertions(+), 47 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 48eb91aad55..2347e0ac63e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -146,7 +146,7 @@ typedef int (*objio_done_fn)(struct objio_state *ios); struct objio_state { /* Generic layer */ - struct objlayout_io_state ol_state; + struct objlayout_io_res oir; struct page **pages; unsigned pgbase; @@ -422,7 +422,7 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, ios = &aos->objios; ios->layout = objio_seg; - objlayout_init_ioerrs(&aos->objios.ol_state, objio_seg->num_comps, + objlayout_init_ioerrs(&aos->objios.oir, objio_seg->num_comps, aos->ioerrs, rpcdata, pnfs_layout_type); ios->pages = pages; @@ -437,10 +437,9 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, return 0; } -void objio_free_result(struct objlayout_io_state *ol_state) +void objio_free_result(struct objlayout_io_res *oir) { - struct objio_state *ios = container_of(ol_state, struct objio_state, - ol_state); + struct objio_state *ios = container_of(oir, struct objio_state, oir); kfree(ios); } @@ -519,7 +518,7 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } - objlayout_io_set_result(&ios->ol_state, i, + objlayout_io_set_result(&ios->oir, i, &ios->layout->comps[i].oc_object_id, osd_pri_2_pnfs_err(osi.osd_err_pri), ios->per_dev[i].offset, @@ -812,7 +811,7 @@ static int _read_done(struct objio_state *ios) else status = ret; - objlayout_read_done(&ios->ol_state, status, ios->sync); + objlayout_read_done(&ios->oir, status, ios->sync); return ret; } @@ -906,13 +905,13 @@ static int _write_done(struct objio_state *ios) if (likely(!ret)) { /* FIXME: should be based on the OSD's persistence model * See OSD2r05 Section 4.13 Data persistence model */ - ios->ol_state.committed = NFS_FILE_SYNC; + ios->oir.committed = NFS_FILE_SYNC; status = ios->length; } else { status = ret; } - objlayout_write_done(&ios->ol_state, status, ios->sync); + objlayout_write_done(&ios->oir, status, ios->sync); return ret; } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index a82053ae559..72074e3a04f 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -179,16 +179,16 @@ void _fix_verify_io_params(struct pnfs_layout_segment *lseg, * I/O done common code */ static void -objlayout_iodone(struct objlayout_io_state *state) +objlayout_iodone(struct objlayout_io_res *oir) { - if (likely(state->status >= 0)) { - objio_free_result(state); + if (likely(oir->status >= 0)) { + objio_free_result(oir); } else { - struct objlayout *objlay = state->objlay; + struct objlayout *objlay = oir->objlay; spin_lock(&objlay->lock); objlay->delta_space_valid = OBJ_DSU_INVALID; - list_add(&objlay->err_list, &state->err_list); + list_add(&objlay->err_list, &oir->err_list); spin_unlock(&objlay->lock); } } @@ -200,13 +200,13 @@ objlayout_iodone(struct objlayout_io_state *state) * the error for later reporting at layout-return. */ void -objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, +objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, struct pnfs_osd_objid *pooid, int osd_error, u64 offset, u64 length, bool is_write) { - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index]; - BUG_ON(index >= state->num_comps); + BUG_ON(index >= oir->num_comps); if (osd_error) { ioerr->oer_component = *pooid; ioerr->oer_comp_offset = offset; @@ -247,15 +247,15 @@ static void _rpc_read_complete(struct work_struct *work) } void -objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) +objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_read_data *rdata = state->rpcdata; + struct nfs_read_data *rdata = oir->rpcdata; - state->status = rdata->task.tk_status = status; + oir->status = rdata->task.tk_status = status; if (status >= 0) rdata->res.count = status; - objlayout_iodone(state); - /* must not use state after this point */ + objlayout_iodone(oir); + /* must not use oir after this point */ dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, status, rdata->res.eof, sync); @@ -326,17 +326,16 @@ static void _rpc_write_complete(struct work_struct *work) } void -objlayout_write_done(struct objlayout_io_state *state, ssize_t status, - bool sync) +objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_write_data *wdata = state->rpcdata; + struct nfs_write_data *wdata = oir->rpcdata; - state->status = wdata->task.tk_status = status; + oir->status = wdata->task.tk_status = status; if (status >= 0) { wdata->res.count = status; - wdata->verf.committed = state->committed; + wdata->verf.committed = oir->committed; } - objlayout_iodone(state); + objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, @@ -475,14 +474,14 @@ merge_ioerr(struct pnfs_osd_ioerr *dest_err, static void encode_accumulated_error(struct objlayout *objlay, __be32 *p) { - struct objlayout_io_state *state, *tmp; + struct objlayout_io_res *oir, *tmp; struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; - list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { unsigned i; - for (i = 0; i < state->num_comps; i++) { - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + for (i = 0; i < oir->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; if (!ioerr->oer_errno) continue; @@ -501,8 +500,8 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) merge_ioerr(&accumulated_err, ioerr); } - list_del(&state->err_list); - objio_free_result(state); + list_del(&oir->err_list); + objio_free_result(oir); } pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); @@ -514,7 +513,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, const struct nfs4_layoutreturn_args *args) { struct objlayout *objlay = OBJLAYOUT(pnfslay); - struct objlayout_io_state *state, *tmp; + struct objlayout_io_res *oir, *tmp; __be32 *start; dprintk("%s: Begin\n", __func__); @@ -523,13 +522,13 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, spin_lock(&objlay->lock); - list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { __be32 *last_xdr = NULL, *p; unsigned i; int res = 0; - for (i = 0; i < state->num_comps; i++) { - struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + for (i = 0; i < oir->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; if (!ioerr->oer_errno) continue; @@ -553,7 +552,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, } last_xdr = p; - pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); + pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]); } /* TODO: use xdr_write_pages */ @@ -569,8 +568,8 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, encode_accumulated_error(objlay, last_xdr); goto loop_done; } - list_del(&state->err_list); - objio_free_result(state); + list_del(&oir->err_list); + objio_free_result(oir); } loop_done: spin_unlock(&objlay->lock); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index d7b2ccfa213..8ec34727ed2 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -74,7 +74,7 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) * per-I/O operation state * embedded in objects provider io_state data structure */ -struct objlayout_io_state { +struct objlayout_io_res { struct objlayout *objlay; void *rpcdata; @@ -93,7 +93,7 @@ struct objlayout_io_state { }; static inline -void objlayout_init_ioerrs(struct objlayout_io_state *oir, unsigned num_comps, +void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps, struct pnfs_osd_ioerr *ioerrs, void *rpcdata, struct pnfs_layout_hdr *pnfs_layout_type) { @@ -114,7 +114,10 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); -extern void objio_free_result(struct objlayout_io_state *state); +/* objio_free_result will free these @oir structs recieved from + * objlayout_{read,write}_done + */ +extern void objio_free_result(struct objlayout_io_res *oir); extern int objio_read_pagelist(struct nfs_read_data *rdata); extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); @@ -122,7 +125,7 @@ extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); /* * callback API */ -extern void objlayout_io_set_result(struct objlayout_io_state *state, +extern void objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, struct pnfs_osd_objid *pooid, int osd_error, u64 offset, u64 length, bool is_write); @@ -141,9 +144,9 @@ objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used) spin_unlock(&objlay->lock); } -extern void objlayout_read_done(struct objlayout_io_state *state, +extern void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync); -extern void objlayout_write_done(struct objlayout_io_state *state, +extern void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync); extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, -- cgit v1.2.3-70-g09d2 From af4f5b54bcf0379089d01518e818f37258708fb7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 15:04:19 -0700 Subject: pnfs-obj: move to ore 01: ore_layout & ore_components For Ease of reviewing I split the move to ore into 3 parts move to ore 01: ore_layout & ore_components move to ore 02: move to ORE move to ore 03: Remove old raid engine This patch modifies the objio_lseg, layout-segment level and devices and components arrays to use the ORE types. Though it will be removed soon, also the raid engine is modified to actually compile, possibly run, with the new types. So it is the same old raid engine but with some new ORE types. For Ease of reviewing, some of the old code is "#if 0" but is not removed so the diff command works better. The old code will be removed in the 3rd patch. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 272 ++++++++++++++++++++----------------------- 1 file changed, 128 insertions(+), 144 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 2347e0ac63e..bd7ec26e284 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -38,7 +38,7 @@ */ #include -#include +#include #include "objlayout.h" @@ -52,7 +52,7 @@ enum { BIO_MAX_PAGES_KMALLOC = struct objio_dev_ent { struct nfs4_deviceid_node id_node; - struct osd_dev *od; + struct ore_dev od; }; static void @@ -60,8 +60,8 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) { struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); - dprintk("%s: free od=%p\n", __func__, de->od); - osduld_put_device(de->od); + dprintk("%s: free od=%p\n", __func__, de->od.od); + osduld_put_device(de->od.od); kfree(de); } @@ -98,12 +98,12 @@ _dev_list_add(const struct nfs_server *nfss, nfss->pnfs_curr_ld, nfss->nfs_client, d_id); - de->od = od; + de->od.od = od; d = nfs4_insert_deviceid_node(&de->id_node); n = container_of(d, struct objio_dev_ent, id_node); if (n != de) { - dprintk("%s: Race with other n->od=%p\n", __func__, n->od); + dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); objio_free_deviceid_node(&de->id_node); de = n; } @@ -111,28 +111,11 @@ _dev_list_add(const struct nfs_server *nfss, return de; } -struct caps_buffers { - u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; - u8 creds[OSD_CAP_LEN]; -}; - struct objio_segment { struct pnfs_layout_segment lseg; - struct pnfs_osd_object_cred *comps; - - unsigned mirrors_p1; - unsigned stripe_unit; - unsigned group_width; /* Data stripe_units without integrity comps */ - u64 group_depth; - unsigned group_count; - - unsigned max_io_size; - - unsigned comps_index; - unsigned num_comps; - /* variable length */ - struct objio_dev_ent *ods[]; + struct ore_layout layout; + struct ore_components oc; }; static inline struct objio_segment * @@ -155,7 +138,8 @@ struct objio_state { loff_t offset; bool sync; - struct objio_segment *layout; + struct ore_layout *layout; + struct ore_components *oc; struct kref kref; objio_done_fn done; @@ -175,32 +159,33 @@ struct objio_state { /* Send and wait for a get_device_info of devices in the layout, then look them up with the osd_initiator library */ -static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, - struct objio_segment *objio_seg, unsigned comp, - gfp_t gfp_flags) +static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, + struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, + gfp_t gfp_flags) { struct pnfs_osd_deviceaddr *deviceaddr; - struct nfs4_deviceid *d_id; struct objio_dev_ent *ode; struct osd_dev *od; struct osd_dev_info odi; int err; - d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; - ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); - if (ode) - return ode; + if (ode) { + objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ + return 0; + } err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); if (unlikely(err)) { dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); - return ERR_PTR(err); + return err; } odi.systemid_len = deviceaddr->oda_systemid.len; if (odi.systemid_len > sizeof(odi.systemid)) { + dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", + __func__, sizeof(odi.systemid)); err = -EINVAL; goto out; } else if (odi.systemid_len) @@ -225,38 +210,15 @@ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, gfp_flags); - + objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ + dprintk("Adding new dev_id(%llx:%llx)\n", + _DEVID_LO(d_id), _DEVID_HI(d_id)); out: - dprintk("%s: return=%d\n", __func__, err); objlayout_put_deviceinfo(deviceaddr); - return err ? ERR_PTR(err) : ode; -} - -static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, - struct objio_segment *objio_seg, - gfp_t gfp_flags) -{ - unsigned i; - int err; - - /* lookup all devices */ - for (i = 0; i < objio_seg->num_comps; i++) { - struct objio_dev_ent *ode; - - ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); - if (unlikely(IS_ERR(ode))) { - err = PTR_ERR(ode); - goto out; - } - objio_seg->ods[i] = ode; - } - err = 0; - -out: - dprintk("%s: return=%d\n", __func__, err); return err; } +#if 0 static int _verify_data_map(struct pnfs_osd_layout *layout) { struct pnfs_osd_data_map *data_map = &layout->olo_map; @@ -296,23 +258,45 @@ static int _verify_data_map(struct pnfs_osd_layout *layout) return 0; } +#endif -static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, - struct pnfs_osd_object_cred *src_comp, - struct caps_buffers *caps_p) +static void copy_single_comp(struct ore_components *oc, unsigned c, + struct pnfs_osd_object_cred *src_comp) { - WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); - WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); + struct ore_comp *ocomp = &oc->comps[c]; - *cur_comp = *src_comp; + WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ + WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); - memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, - sizeof(caps_p->caps_key)); - cur_comp->oc_cap_key.cred = caps_p->caps_key; + ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; + ocomp->obj.id = src_comp->oc_object_id.oid_object_id; - memcpy(caps_p->creds, src_comp->oc_cap.cred, - sizeof(caps_p->creds)); - cur_comp->oc_cap.cred = caps_p->creds; + memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); +} + +int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, + struct objio_segment **pseg) +{ + struct __alloc_objio_segment { + struct objio_segment olseg; + struct ore_dev *ods[numdevs]; + struct ore_comp comps[numdevs]; + } *aolseg; + + aolseg = kzalloc(sizeof(*aolseg), gfp_flags); + if (unlikely(!aolseg)) { + dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, + numdevs, sizeof(*aolseg)); + return -ENOMEM; + } + + aolseg->olseg.oc.numdevs = numdevs; + aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; + aolseg->olseg.oc.comps = aolseg->comps; + aolseg->olseg.oc.ods = aolseg->ods; + + *pseg = &aolseg->olseg; + return 0; } int objio_alloc_lseg(struct pnfs_layout_segment **outp, @@ -324,59 +308,43 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, struct objio_segment *objio_seg; struct pnfs_osd_xdr_decode_layout_iter iter; struct pnfs_osd_layout layout; - struct pnfs_osd_object_cred *cur_comp, src_comp; - struct caps_buffers *caps_p; + struct pnfs_osd_object_cred src_comp; + unsigned cur_comp; int err; err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); if (unlikely(err)) return err; - err = _verify_data_map(&layout); + err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); if (unlikely(err)) return err; - objio_seg = kzalloc(sizeof(*objio_seg) + - sizeof(objio_seg->ods[0]) * layout.olo_num_comps + - sizeof(*objio_seg->comps) * layout.olo_num_comps + - sizeof(struct caps_buffers) * layout.olo_num_comps, - gfp_flags); - if (!objio_seg) - return -ENOMEM; + objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; + objio_seg->layout.group_width = layout.olo_map.odm_group_width; + objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; + objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; + objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; - objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); - cur_comp = objio_seg->comps; - caps_p = (void *)(cur_comp + layout.olo_num_comps); - while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) - copy_single_comp(cur_comp++, &src_comp, caps_p++); + err = ore_verify_layout(layout.olo_map.odm_num_comps, + &objio_seg->layout); if (unlikely(err)) goto err; - objio_seg->num_comps = layout.olo_num_comps; - objio_seg->comps_index = layout.olo_comps_index; - err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); - if (err) - goto err; - - objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; - objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; - if (layout.olo_map.odm_group_width) { - objio_seg->group_width = layout.olo_map.odm_group_width; - objio_seg->group_depth = layout.olo_map.odm_group_depth; - objio_seg->group_count = layout.olo_map.odm_num_comps / - objio_seg->mirrors_p1 / - objio_seg->group_width; - } else { - objio_seg->group_width = layout.olo_map.odm_num_comps / - objio_seg->mirrors_p1; - objio_seg->group_depth = -1; - objio_seg->group_count = 1; + objio_seg->oc.first_dev = layout.olo_comps_index; + cur_comp = 0; + while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { + copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); + err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, + &src_comp.oc_object_id.oid_device_id, + gfp_flags); + if (err) + goto err; + ++cur_comp; } - - /* Cache this calculation it will hit for every page */ - objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - - objio_seg->stripe_unit) * - objio_seg->group_width; + /* pnfs_osd_xdr_decode_layout_comp returns false on error */ + if (unlikely(err)) + goto err; *outp = &objio_seg->lseg; return 0; @@ -393,10 +361,14 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) int i; struct objio_segment *objio_seg = OBJIO_LSEG(lseg); - for (i = 0; i < objio_seg->num_comps; i++) { - if (!objio_seg->ods[i]) + for (i = 0; i < objio_seg->oc.numdevs; i++) { + struct ore_dev *od = objio_seg->oc.ods[i]; + struct objio_dev_ent *ode; + + if (!od) break; - nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); + ode = container_of(od, typeof(*ode), od); + nfs4_put_deviceid_node(&ode->id_node); } kfree(objio_seg); } @@ -411,8 +383,8 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, struct objio_state *ios; struct __alloc_objio_state { struct objio_state objios; - struct _objio_per_comp per_dev[objio_seg->num_comps]; - struct pnfs_osd_ioerr ioerrs[objio_seg->num_comps]; + struct _objio_per_comp per_dev[objio_seg->oc.numdevs]; + struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; } *aos; aos = kzalloc(sizeof(*aos), gfp_flags); @@ -421,8 +393,9 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, ios = &aos->objios; - ios->layout = objio_seg; - objlayout_init_ioerrs(&aos->objios.oir, objio_seg->num_comps, + ios->layout = &objio_seg->layout; + ios->oc = &objio_seg->oc; + objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, aos->ioerrs, rpcdata, pnfs_layout_type); ios->pages = pages; @@ -474,6 +447,27 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) } } +static void __on_dev_error(struct objio_state *ios, bool is_write, + struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, + u64 dev_offset, u64 dev_len) +{ + struct objio_state *objios = ios->private; + struct pnfs_osd_objid pooid; + struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); + /* FIXME: what to do with more-then-one-group layouts. We need to + * translate from ore_io_state index to oc->comps index + */ + unsigned comp = dev_index; + + pooid.oid_device_id = ode->id_node.deviceid; + pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; + pooid.oid_object_id = ios->oc->comps[comp].obj.id; + + objlayout_io_set_result(&objios->oir, comp, + &pooid, osd_pri_2_pnfs_err(oep), + dev_offset, dev_len, is_write); +} + static void _clear_bio(struct bio *bio) { struct bio_vec *bv; @@ -518,12 +512,9 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } - objlayout_io_set_result(&ios->oir, i, - &ios->layout->comps[i].oc_object_id, - osd_pri_2_pnfs_err(osi.osd_err_pri), - ios->per_dev[i].offset, - ios->per_dev[i].length, - is_write); + __on_dev_error(ios, is_write, ios->oc->ods[i], + ios->per_dev[i].dev, osi.osd_err_pri, + ios->per_dev[i].offset, ios->per_dev[i].length); if (osi.osd_err_pri >= oep) { oep = osi.osd_err_pri; @@ -558,11 +549,11 @@ static void _io_free(struct objio_state *ios) struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) { - unsigned min_dev = ios->layout->comps_index; - unsigned max_dev = min_dev + ios->layout->num_comps; + unsigned min_dev = ios->oc->first_dev; + unsigned max_dev = min_dev + ios->oc->numdevs; BUG_ON(dev < min_dev || max_dev <= dev); - return ios->layout->ods[dev - min_dev]->od; + return ios->oc->ods[dev - min_dev]->od; } struct _striping_info { @@ -820,12 +811,9 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) struct osd_request *or = NULL; struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; unsigned dev = per_dev->dev; - struct pnfs_osd_object_cred *cred = - &ios->layout->comps[cur_comp]; - struct osd_obj_id obj = { - .partition = cred->oc_object_id.oid_partition_id, - .id = cred->oc_object_id.oid_object_id, - }; + struct ore_comp *cred = + &ios->oc->comps[cur_comp]; + struct osd_obj_id obj = cred->obj; int ret; or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); @@ -837,7 +825,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); - ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + ret = osd_finalize_request(or, 0, cred->cred, NULL); if (ret) { dprintk("%s: Faild to osd_finalize_request() => %d\n", __func__, ret); @@ -924,12 +912,8 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) for (; cur_comp < last_comp; ++cur_comp, ++dev) { struct osd_request *or = NULL; - struct pnfs_osd_object_cred *cred = - &ios->layout->comps[cur_comp]; - struct osd_obj_id obj = { - .partition = cred->oc_object_id.oid_partition_id, - .id = cred->oc_object_id.oid_object_id, - }; + struct ore_comp *cred = &ios->oc->comps[cur_comp]; + struct osd_obj_id obj = cred->obj; struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; struct bio *bio; @@ -964,7 +948,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); - ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + ret = osd_finalize_request(or, 0, cred->cred, NULL); if (ret) { dprintk("%s: Faild to osd_finalize_request() => %d\n", __func__, ret); @@ -1030,7 +1014,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->max_io_size; + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; } static const struct nfs_pageio_ops objio_pg_read_ops = { -- cgit v1.2.3-70-g09d2 From eecfc6312a24e6d0d2883de0a9a6ccf8e993f472 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 15:15:38 -0700 Subject: pnfs-obj: move to ore 02: move to ORE In this patch we are actually moving to the ORE. (Object Raid Engine). objio_state holds a pointer to an ore_io_state. Once we have an ore_io_state at hand we can call the ore for reading/writing. We register on the done path to kick off the nfs io_done mechanism. Again for Ease of reviewing the old code is "#if 0" but is not removed so the diff command works better. The old code will be removed in the next patch. fs/exofs/Kconfig::ORE is modified to also be auto-included if PNFS_OBJLAYOUT is set. Since we now depend on ORE. (See comments in fs/exofs/Kconfig) Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/exofs/Kconfig | 2 +- fs/nfs/objlayout/objio_osd.c | 133 +++++++++++++++++++------------------------ 2 files changed, 60 insertions(+), 75 deletions(-) (limited to 'fs/nfs') diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index fa9a286c877..da42f32c49b 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig @@ -5,7 +5,7 @@ # selected by any of the users. config ORE tristate - depends on EXOFS_FS + depends on EXOFS_FS || PNFS_OBJLAYOUT select ASYNC_XOR default SCSI_OSD_ULD diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index bd7ec26e284..00b384934c3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -44,12 +44,6 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -#define _LLU(x) ((unsigned long long)x) - -enum { BIO_MAX_PAGES_KMALLOC = - (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), -}; - struct objio_dev_ent { struct nfs4_deviceid_node id_node; struct ore_dev od; @@ -124,37 +118,13 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) return container_of(lseg, struct objio_segment, lseg); } -struct objio_state; -typedef int (*objio_done_fn)(struct objio_state *ios); - struct objio_state { /* Generic layer */ struct objlayout_io_res oir; - struct page **pages; - unsigned pgbase; - unsigned nr_pages; - unsigned long count; - loff_t offset; bool sync; - - struct ore_layout *layout; - struct ore_components *oc; - - struct kref kref; - objio_done_fn done; - void *private; - - unsigned long length; - unsigned numdevs; /* Actually used devs in this IO */ - /* A per-device variable array of size numdevs */ - struct _objio_per_comp { - struct bio *bio; - struct osd_request *or; - unsigned long length; - u64 offset; - unsigned dev; - } per_dev[]; + /*FIXME: Support for extra_bytes at ore_get_rw_state() */ + struct ore_io_state *ios; }; /* Send and wait for a get_device_info of devices in the layout, @@ -374,16 +344,16 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) } static int -objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, +objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, struct objio_state **outp) { struct objio_segment *objio_seg = OBJIO_LSEG(lseg); - struct objio_state *ios; + struct ore_io_state *ios; + int ret; struct __alloc_objio_state { struct objio_state objios; - struct _objio_per_comp per_dev[objio_seg->oc.numdevs]; struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; } *aos; @@ -391,30 +361,33 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, if (unlikely(!aos)) return -ENOMEM; - ios = &aos->objios; - - ios->layout = &objio_seg->layout; - ios->oc = &objio_seg->oc; objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, aos->ioerrs, rpcdata, pnfs_layout_type); + ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, + offset, count, &ios); + if (unlikely(ret)) { + kfree(aos); + return ret; + } + ios->pages = pages; ios->pgbase = pgbase; - ios->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; - ios->offset = offset; - ios->count = count; - ios->sync = 0; + ios->private = aos; BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); - *outp = ios; + aos->objios.sync = 0; + aos->objios.ios = ios; + *outp = &aos->objios; return 0; } void objio_free_result(struct objlayout_io_res *oir) { - struct objio_state *ios = container_of(oir, struct objio_state, oir); + struct objio_state *objios = container_of(oir, struct objio_state, oir); - kfree(ios); + ore_put_io_state(objios->ios); + kfree(objios); } enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) @@ -447,7 +420,7 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) } } -static void __on_dev_error(struct objio_state *ios, bool is_write, +static void __on_dev_error(struct ore_io_state *ios, struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, u64 dev_offset, u64 dev_len) { @@ -465,9 +438,10 @@ static void __on_dev_error(struct objio_state *ios, bool is_write, objlayout_io_set_result(&objios->oir, comp, &pooid, osd_pri_2_pnfs_err(oep), - dev_offset, dev_len, is_write); + dev_offset, dev_len, !ios->reading); } +#if 0 static void _clear_bio(struct bio *bio) { struct bio_vec *bv; @@ -786,26 +760,28 @@ static int _io_exec(struct objio_state *ios) return ret; } +#endif /* * read */ -static int _read_done(struct objio_state *ios) +static void _read_done(struct ore_io_state *ios, void *private) { + struct objio_state *objios = private; ssize_t status; - int ret = _io_check(ios, false); + int ret = ore_check_io(ios, &__on_dev_error); - _io_free(ios); + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ if (likely(!ret)) status = ios->length; else status = ret; - objlayout_read_done(&ios->oir, status, ios->sync); - return ret; + objlayout_read_done(&objios->oir, status, objios->sync); } +#if 0 static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) { struct osd_request *or = NULL; @@ -860,49 +836,50 @@ err: _io_free(ios); return ret; } +#endif int objio_read_pagelist(struct nfs_read_data *rdata) { - struct objio_state *ios; + struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, + ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, rdata->lseg, rdata->args.pages, rdata->args.pgbase, rdata->args.offset, rdata->args.count, rdata, - GFP_KERNEL, &ios); - if (unlikely(ret)) - return ret; - - ret = _io_rw_pagelist(ios, GFP_KERNEL); + GFP_KERNEL, &objios); if (unlikely(ret)) return ret; - return _read_exec(ios); + objios->ios->done = _read_done; + dprintk("%s: offset=0x%llx length=0x%x\n", __func__, + rdata->args.offset, rdata->args.count); + return ore_read(objios->ios); } /* * write */ -static int _write_done(struct objio_state *ios) +static void _write_done(struct ore_io_state *ios, void *private) { + struct objio_state *objios = private; ssize_t status; - int ret = _io_check(ios, true); + int ret = ore_check_io(ios, &__on_dev_error); - _io_free(ios); + /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ if (likely(!ret)) { /* FIXME: should be based on the OSD's persistence model * See OSD2r05 Section 4.13 Data persistence model */ - ios->oir.committed = NFS_FILE_SYNC; + objios->oir.committed = NFS_FILE_SYNC; status = ios->length; } else { status = ret; } - objlayout_write_done(&ios->oir, status, ios->sync); - return ret; + objlayout_write_done(&objios->oir, status, objios->sync); } +#if 0 static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) { struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; @@ -984,27 +961,35 @@ err: _io_free(ios); return ret; } +#endif int objio_write_pagelist(struct nfs_write_data *wdata, int how) { - struct objio_state *ios; + struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, + ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, wdata->lseg, wdata->args.pages, wdata->args.pgbase, wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, - &ios); + &objios); if (unlikely(ret)) return ret; - ios->sync = 0 != (how & FLUSH_SYNC); + objios->sync = 0 != (how & FLUSH_SYNC); - /* TODO: ios->stable = stable; */ - ret = _io_rw_pagelist(ios, GFP_NOFS); + if (!objios->sync) + objios->ios->done = _write_done; + + dprintk("%s: offset=0x%llx length=0x%x\n", __func__, + wdata->args.offset, wdata->args.count); + ret = ore_write(objios->ios); if (unlikely(ret)) return ret; - return _write_exec(ios); + if (objios->sync) + _write_done(objios->ios, objios); + + return 0; } static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, -- cgit v1.2.3-70-g09d2 From 04291b628c450ab6fdb606836585f16336662a4e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 15:16:15 -0700 Subject: pnfs-obj: move to ore 03: Remove old raid engine Finally remove all the old raid engine, which is by now dead code. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 504 ------------------------------------------- 1 file changed, 504 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 00b384934c3..3161da654a9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -188,48 +188,6 @@ out: return err; } -#if 0 -static int _verify_data_map(struct pnfs_osd_layout *layout) -{ - struct pnfs_osd_data_map *data_map = &layout->olo_map; - u64 stripe_length; - u32 group_width; - -/* FIXME: Only raid0 for now. if not go through MDS */ - if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { - printk(KERN_ERR "Only RAID_0 for now\n"); - return -ENOTSUPP; - } - if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { - printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", - data_map->odm_num_comps, data_map->odm_mirror_cnt); - return -EINVAL; - } - - if (data_map->odm_group_width) - group_width = data_map->odm_group_width; - else - group_width = data_map->odm_num_comps / - (data_map->odm_mirror_cnt + 1); - - stripe_length = (u64)data_map->odm_stripe_unit * group_width; - if (stripe_length >= (1ULL << 32)) { - printk(KERN_ERR "Total Stripe length(0x%llx)" - " >= 32bit is not supported\n", _LLU(stripe_length)); - return -ENOTSUPP; - } - - if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { - printk(KERN_ERR "Stripe Unit(0x%llx)" - " must be Multples of PAGE_SIZE(0x%lx)\n", - _LLU(data_map->odm_stripe_unit), PAGE_SIZE); - return -ENOTSUPP; - } - - return 0; -} -#endif - static void copy_single_comp(struct ore_components *oc, unsigned c, struct pnfs_osd_object_cred *src_comp) { @@ -441,327 +399,6 @@ static void __on_dev_error(struct ore_io_state *ios, dev_offset, dev_len, !ios->reading); } -#if 0 -static void _clear_bio(struct bio *bio) -{ - struct bio_vec *bv; - unsigned i; - - __bio_for_each_segment(bv, bio, i, 0) { - unsigned this_count = bv->bv_len; - - if (likely(PAGE_SIZE == this_count)) - clear_highpage(bv->bv_page); - else - zero_user(bv->bv_page, bv->bv_offset, this_count); - } -} - -static int _io_check(struct objio_state *ios, bool is_write) -{ - enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; - int lin_ret = 0; - int i; - - for (i = 0; i < ios->numdevs; i++) { - struct osd_sense_info osi; - struct osd_request *or = ios->per_dev[i].or; - int ret; - - if (!or) - continue; - - ret = osd_req_decode_sense(or, &osi); - if (likely(!ret)) - continue; - - if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { - /* start read offset passed endof file */ - BUG_ON(is_write); - _clear_bio(ios->per_dev[i].bio); - dprintk("%s: start read offset passed end of file " - "offset=0x%llx, length=0x%lx\n", __func__, - _LLU(ios->per_dev[i].offset), - ios->per_dev[i].length); - - continue; /* we recovered */ - } - __on_dev_error(ios, is_write, ios->oc->ods[i], - ios->per_dev[i].dev, osi.osd_err_pri, - ios->per_dev[i].offset, ios->per_dev[i].length); - - if (osi.osd_err_pri >= oep) { - oep = osi.osd_err_pri; - lin_ret = ret; - } - } - - return lin_ret; -} - -/* - * Common IO state helpers. - */ -static void _io_free(struct objio_state *ios) -{ - unsigned i; - - for (i = 0; i < ios->numdevs; i++) { - struct _objio_per_comp *per_dev = &ios->per_dev[i]; - - if (per_dev->or) { - osd_end_request(per_dev->or); - per_dev->or = NULL; - } - - if (per_dev->bio) { - bio_put(per_dev->bio); - per_dev->bio = NULL; - } - } -} - -struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) -{ - unsigned min_dev = ios->oc->first_dev; - unsigned max_dev = min_dev + ios->oc->numdevs; - - BUG_ON(dev < min_dev || max_dev <= dev); - return ios->oc->ods[dev - min_dev]->od; -} - -struct _striping_info { - u64 obj_offset; - u64 group_length; - unsigned dev; - unsigned unit_off; -}; - -static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, - struct _striping_info *si) -{ - u32 stripe_unit = ios->layout->stripe_unit; - u32 group_width = ios->layout->group_width; - u64 group_depth = ios->layout->group_depth; - u32 U = stripe_unit * group_width; - - u64 T = U * group_depth; - u64 S = T * ios->layout->group_count; - u64 M = div64_u64(file_offset, S); - - /* - G = (L - (M * S)) / T - H = (L - (M * S)) % T - */ - u64 LmodU = file_offset - M * S; - u32 G = div64_u64(LmodU, T); - u64 H = LmodU - G * T; - - u32 N = div_u64(H, U); - - div_u64_rem(file_offset, stripe_unit, &si->unit_off); - si->obj_offset = si->unit_off + (N * stripe_unit) + - (M * group_depth * stripe_unit); - - /* "H - (N * U)" is just "H % U" so it's bound to u32 */ - si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; - si->dev *= ios->layout->mirrors_p1; - - si->group_length = T - H; -} - -static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, - unsigned pgbase, struct _objio_per_comp *per_dev, int len, - gfp_t gfp_flags) -{ - unsigned pg = *cur_pg; - int cur_len = len; - struct request_queue *q = - osd_request_queue(_io_od(ios, per_dev->dev)); - - if (per_dev->bio == NULL) { - unsigned pages_in_stripe = ios->layout->group_width * - (ios->layout->stripe_unit / PAGE_SIZE); - unsigned bio_size = (ios->nr_pages + pages_in_stripe) / - ios->layout->group_width; - - if (BIO_MAX_PAGES_KMALLOC < bio_size) - bio_size = BIO_MAX_PAGES_KMALLOC; - - per_dev->bio = bio_kmalloc(gfp_flags, bio_size); - if (unlikely(!per_dev->bio)) { - dprintk("Faild to allocate BIO size=%u\n", bio_size); - return -ENOMEM; - } - } - - while (cur_len > 0) { - unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); - unsigned added_len; - - BUG_ON(ios->nr_pages <= pg); - cur_len -= pglen; - - added_len = bio_add_pc_page(q, per_dev->bio, - ios->pages[pg], pglen, pgbase); - if (unlikely(pglen != added_len)) - return -ENOMEM; - pgbase = 0; - ++pg; - } - BUG_ON(cur_len); - - per_dev->length += len; - *cur_pg = pg; - return 0; -} - -static int _prepare_one_group(struct objio_state *ios, u64 length, - struct _striping_info *si, unsigned *last_pg, - gfp_t gfp_flags) -{ - unsigned stripe_unit = ios->layout->stripe_unit; - unsigned mirrors_p1 = ios->layout->mirrors_p1; - unsigned devs_in_group = ios->layout->group_width * mirrors_p1; - unsigned dev = si->dev; - unsigned first_dev = dev - (dev % devs_in_group); - unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; - unsigned cur_pg = *last_pg; - int ret = 0; - - while (length) { - struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; - unsigned cur_len, page_off = 0; - - if (!per_dev->length) { - per_dev->dev = dev; - if (dev < si->dev) { - per_dev->offset = si->obj_offset + stripe_unit - - si->unit_off; - cur_len = stripe_unit; - } else if (dev == si->dev) { - per_dev->offset = si->obj_offset; - cur_len = stripe_unit - si->unit_off; - page_off = si->unit_off & ~PAGE_MASK; - BUG_ON(page_off && - (page_off != ios->pgbase)); - } else { /* dev > si->dev */ - per_dev->offset = si->obj_offset - si->unit_off; - cur_len = stripe_unit; - } - - if (max_comp < dev - first_dev) - max_comp = dev - first_dev; - } else { - cur_len = stripe_unit; - } - if (cur_len >= length) - cur_len = length; - - ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, - cur_len, gfp_flags); - if (unlikely(ret)) - goto out; - - dev += mirrors_p1; - dev = (dev % devs_in_group) + first_dev; - - length -= cur_len; - ios->length += cur_len; - } -out: - ios->numdevs = max_comp + mirrors_p1; - *last_pg = cur_pg; - return ret; -} - -static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) -{ - u64 length = ios->count; - u64 offset = ios->offset; - struct _striping_info si; - unsigned last_pg = 0; - int ret = 0; - - while (length) { - _calc_stripe_info(ios, offset, &si); - - if (length < si.group_length) - si.group_length = length; - - ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); - if (unlikely(ret)) - goto out; - - offset += si.group_length; - length -= si.group_length; - } - -out: - if (!ios->length) - return ret; - - return 0; -} - -static int _sync_done(struct objio_state *ios) -{ - struct completion *waiting = ios->private; - - complete(waiting); - return 0; -} - -static void _last_io(struct kref *kref) -{ - struct objio_state *ios = container_of(kref, struct objio_state, kref); - - ios->done(ios); -} - -static void _done_io(struct osd_request *or, void *p) -{ - struct objio_state *ios = p; - - kref_put(&ios->kref, _last_io); -} - -static int _io_exec(struct objio_state *ios) -{ - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - unsigned i; - objio_done_fn saved_done_fn = ios->done; - bool sync = ios->sync; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - kref_init(&ios->kref); - - for (i = 0; i < ios->numdevs; i++) { - struct osd_request *or = ios->per_dev[i].or; - - if (!or) - continue; - - kref_get(&ios->kref); - osd_execute_request_async(or, _done_io, ios); - } - - kref_put(&ios->kref, _last_io); - - if (sync) { - wait_for_completion(&wait); - ret = saved_done_fn(ios); - } - - return ret; -} -#endif - /* * read */ @@ -781,63 +418,6 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -#if 0 -static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) -{ - struct osd_request *or = NULL; - struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; - unsigned dev = per_dev->dev; - struct ore_comp *cred = - &ios->oc->comps[cur_comp]; - struct osd_obj_id obj = cred->obj; - int ret; - - or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); - if (unlikely(!or)) { - ret = -ENOMEM; - goto err; - } - per_dev->or = or; - - osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); - - ret = osd_finalize_request(or, 0, cred->cred, NULL); - if (ret) { - dprintk("%s: Faild to osd_finalize_request() => %d\n", - __func__, ret); - goto err; - } - - dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", - __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), - per_dev->length); - -err: - return ret; -} - -static int _read_exec(struct objio_state *ios) -{ - unsigned i; - int ret; - - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - if (!ios->per_dev[i].length) - continue; - ret = _read_mirrors(ios, i); - if (unlikely(ret)) - goto err; - } - - ios->done = _read_done; - return _io_exec(ios); - -err: - _io_free(ios); - return ret; -} -#endif - int objio_read_pagelist(struct nfs_read_data *rdata) { struct objio_state *objios; @@ -879,90 +459,6 @@ static void _write_done(struct ore_io_state *ios, void *private) objlayout_write_done(&objios->oir, status, objios->sync); } -#if 0 -static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) -{ - struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; - unsigned dev = ios->per_dev[cur_comp].dev; - unsigned last_comp = cur_comp + ios->layout->mirrors_p1; - int ret; - - for (; cur_comp < last_comp; ++cur_comp, ++dev) { - struct osd_request *or = NULL; - struct ore_comp *cred = &ios->oc->comps[cur_comp]; - struct osd_obj_id obj = cred->obj; - struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; - struct bio *bio; - - or = osd_start_request(_io_od(ios, dev), GFP_NOFS); - if (unlikely(!or)) { - ret = -ENOMEM; - goto err; - } - per_dev->or = or; - - if (per_dev != master_dev) { - bio = bio_kmalloc(GFP_NOFS, - master_dev->bio->bi_max_vecs); - if (unlikely(!bio)) { - dprintk("Faild to allocate BIO size=%u\n", - master_dev->bio->bi_max_vecs); - ret = -ENOMEM; - goto err; - } - - __bio_clone(bio, master_dev->bio); - bio->bi_bdev = NULL; - bio->bi_next = NULL; - per_dev->bio = bio; - per_dev->dev = dev; - per_dev->length = master_dev->length; - per_dev->offset = master_dev->offset; - } else { - bio = master_dev->bio; - bio->bi_rw |= REQ_WRITE; - } - - osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); - - ret = osd_finalize_request(or, 0, cred->cred, NULL); - if (ret) { - dprintk("%s: Faild to osd_finalize_request() => %d\n", - __func__, ret); - goto err; - } - - dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", - __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), - per_dev->length); - } - -err: - return ret; -} - -static int _write_exec(struct objio_state *ios) -{ - unsigned i; - int ret; - - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - if (!ios->per_dev[i].length) - continue; - ret = _write_mirrors(ios, i); - if (unlikely(ret)) - goto err; - } - - ios->done = _write_done; - return _io_exec(ios); - -err: - _io_free(ios); - return ret; -} -#endif - int objio_write_pagelist(struct nfs_write_data *wdata, int how) { struct objio_state *objios; -- cgit v1.2.3-70-g09d2 From 278c023a99b0d6b471d0f4a79835c703482e29ac Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Oct 2011 15:16:54 -0700 Subject: pnfs-obj: Support for RAID5 read-4-write interface. The ore need suplied a r4w_get_page/r4w_put_page API from Filesystem so it can get cache pages to read-into when writing parial stripes. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 3161da654a9..c807ab93140 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -459,6 +459,43 @@ static void _write_done(struct ore_io_state *ios, void *private) objlayout_write_done(&objios->oir, status, objios->sync); } +static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) +{ + struct objio_state *objios = priv; + struct nfs_write_data *wdata = objios->oir.rpcdata; + pgoff_t index = offset / PAGE_SIZE; + struct page *page = find_get_page(wdata->inode->i_mapping, index); + + if (!page) { + page = find_or_create_page(wdata->inode->i_mapping, + index, GFP_NOFS); + if (unlikely(!page)) { + dprintk("%s: grab_cache_page Failed index=0x%lx\n", + __func__, index); + return NULL; + } + unlock_page(page); + } + if (PageDirty(page) || PageWriteback(page)) + *uptodate = true; + else + *uptodate = PageUptodate(page); + dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); + return page; +} + +static void __r4w_put_page(void *priv, struct page *page) +{ + dprintk("%s: index=0x%lx\n", __func__, page->index); + page_cache_release(page); + return; +} + +static const struct _ore_r4w_op _r4w_op = { + .get_page = &__r4w_get_page, + .put_page = &__r4w_put_page, +}; + int objio_write_pagelist(struct nfs_write_data *wdata, int how) { struct objio_state *objios; @@ -472,6 +509,7 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) return ret; objios->sync = 0 != (how & FLUSH_SYNC); + objios->ios->r4w = &_r4w_op; if (!objios->sync) objios->ios->done = _write_done; -- cgit v1.2.3-70-g09d2 From 6070295efc90d1093b2031c43380bd7d9673c802 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Nov 2011 07:04:10 -0400 Subject: nfs: set vs_hidden on nfs4_callback_version4 (try #2) This service should not be registered with or unregistered from rpcbind. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index ee1a5b3cd48..726e59a9e50 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -987,4 +987,5 @@ struct svc_version nfs4_callback_version4 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, + .vs_hidden = 1, }; -- cgit v1.2.3-70-g09d2