From 070ea60214c1894c9eec86ca9aa5dff57a5ab525 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 19 May 2007 17:22:52 -0400 Subject: NFS: Clean ups in fs/nfs/direct.c Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 00eee87510f..4c97e55e86e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -763,10 +763,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; - - if (count < 0) goto out; + retval = -EFAULT; if (!access_ok(VERIFY_WRITE, buf, count)) goto out; @@ -814,7 +812,7 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - ssize_t retval; + ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; /* XXX: temporary */ @@ -827,7 +825,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; + goto out; retval = generic_write_checks(file, &pos, &count, 0); if (retval) -- cgit v1.2.3-70-g09d2 From d9df8d6b38228afab519094048aa2c082b0b2cf4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 May 2007 10:22:20 -0400 Subject: NFS: Don't fail an O_DIRECT read/write if get_user_pages() returns pages There is no need to fail the entire O_DIRECT read/write just because get_user_pages() returned fewer pages than we requested. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4c97e55e86e..f1b153ad645 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); @@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); -- cgit v1.2.3-70-g09d2 From 44dd151d5c21234cc534c47d7382f5c28c3143cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 May 2007 11:58:03 -0400 Subject: NFS: Don't mark a written page as uptodate until it is on disk The write may fail, so we should not mark the page as uptodate until we are certain that the data has been accepted and written to disk by the server. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af344a158e0..b853959d964 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, } /* Update file length */ nfs_grow_file(page, offset, count); - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); return 0; } @@ -751,7 +749,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + if (PageError(req->wb_page)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else if (!nfs_reschedule_unstable_write(req)) { + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else @@ -1039,6 +1042,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) dprintk(" marked for commit\n"); goto next; } + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); @@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, + req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; -- cgit v1.2.3-70-g09d2 From de05a0cc2a2ae16eb8d8dbf88fe728ace45beb9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 May 2007 13:05:05 -0400 Subject: NFS: Minor read optimisation... Since PG_uptodate may now end up getting set during the call to nfs_wb_page(), we can avoid putting a read request on the wire in those situations. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7bd7cb95c03..c07d0d10d9e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page) */ error = nfs_wb_page(inode, page); if (error) - goto out_error; + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; error = -ESTALE; if (NFS_STALE(inode)) - goto out_error; + goto out_unlock; if (file == NULL) { error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - goto out_error; + goto out_unlock; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page) put_nfs_open_context(ctx); return error; - -out_error: +out_unlock: unlock_page(page); return error; } @@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page) struct inode *inode = page->mapping->host; struct nfs_page *new; unsigned int len; + int error; + + error = nfs_wb_page(inode, page); + if (error) + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; - nfs_wb_page(inode, page); len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + new = nfs_create_request(desc->ctx, inode, page, 0, len); - if (IS_ERR(new)) { - SetPageError(page); - unlock_page(page); - return PTR_ERR(new); - } + if (IS_ERR(new)) + goto out_error; + if (len < PAGE_CACHE_SIZE) zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; +out_error: + error = PTR_ERR(new); + SetPageError(page); +out_unlock: + unlock_page(page); + return error; } int nfs_readpages(struct file *filp, struct address_space *mapping, -- cgit v1.2.3-70-g09d2 From 88be9f990fe70f0f177ef44a16a477599e91f825 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 10:42:27 -0400 Subject: NFS: Replace vfsmount and dentry in nfs_open_context with struct path Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/direct.c | 4 ++-- fs/nfs/inode.c | 10 +++++----- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/pagelist.c | 6 +++--- fs/nfs/read.c | 6 +++--- fs/nfs/write.c | 20 ++++++++++---------- include/linux/nfs_fs.h | 4 ++-- 8 files changed, 29 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f37d1bea83..b47c156a711 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -74,7 +74,7 @@ again: continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx->dentry, state); + err = nfs4_open_delegation_recall(ctx->path.dentry, state); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f1b153ad645..a5c82b6f3b4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int pgbase; int result; @@ -606,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = { static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd9f5a83659..cc7a9064be9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -462,8 +462,8 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { atomic_set(&ctx->count, 1); - ctx->dentry = dget(dentry); - ctx->vfsmnt = mntget(mnt); + ctx->path.dentry = dget(dentry); + ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -484,7 +484,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) { if (atomic_dec_and_test(&ctx->count)) { if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; spin_lock(&inode->i_lock); list_del(&ctx->list); spin_unlock(&inode->i_lock); @@ -493,8 +493,8 @@ void put_nfs_open_context(struct nfs_open_context *ctx) nfs4_close_state(ctx->state, ctx->mode); if (ctx->cred != NULL) put_rpccred(ctx->cred); - dput(ctx->dentry); - mntput(ctx->vfsmnt); + dput(ctx->path.dentry); + mntput(ctx->path.mnt); kfree(ctx); } } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 648e0ac0f90..4d641cbdbde 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -512,7 +512,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); + ret = nfs4_do_open_reclaim(sp, state, ctx->path.dentry); put_nfs_open_context(ctx); return ret; } @@ -862,7 +862,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_expired(sp, state, ctx->dentry); + ret = nfs4_do_open_expired(sp, state, ctx->path.dentry); put_nfs_open_context(ctx); return ret; } @@ -3285,7 +3285,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); out: diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c5bb51a29e8..f8a4ba53393 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -114,7 +114,7 @@ void nfs_unlock_request(struct nfs_page *req) */ int nfs_set_page_writeback_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (!nfs_lock_request(req)) return 0; @@ -127,7 +127,7 @@ int nfs_set_page_writeback_locked(struct nfs_page *req) */ void nfs_clear_page_writeback(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (req->wb_page != NULL) { spin_lock(&nfsi->req_lock); @@ -193,7 +193,7 @@ static int nfs_wait_bit_interruptible(void *word) int nfs_wait_on_request(struct nfs_page *req) { - struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode); sigset_t oldmask; int ret = 0; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c07d0d10d9e..6ae2e58ed05 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_clear_request(req); @@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, int flags; data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b853959d964..9e7c21da864 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -407,7 +407,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); @@ -455,7 +455,7 @@ nfs_dirty_request(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); @@ -789,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -957,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1023,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1162,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - inode = first->wb_context->dentry->d_inode; + inode = first->wb_context->path.dentry->d_inode; data->inode = inode; data->cred = first->wb_context->cred; @@ -1239,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0543439a97a..07eea8f64ec 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -70,8 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { atomic_t count; - struct vfsmount *vfsmnt; - struct dentry *dentry; + struct path path; struct rpc_cred *cred; struct nfs4_state *state; fl_owner_t lockowner; -- cgit v1.2.3-70-g09d2 From 539cd03a5708c9861a3e738e6f363ad743c85ddf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 11:46:42 -0400 Subject: NFSv4: Cleanup: pass the nfs_open_context to open recovery code Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 2 +- fs/nfs/nfs4proc.c | 38 +++++++++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b47c156a711..9f17b91205c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -74,7 +74,7 @@ again: continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx->path.dentry, state); + err = nfs4_open_delegation_recall(ctx, state); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2cfd4b24c7f..f6e42fb21af 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -39,7 +39,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4d641cbdbde..c83db9def0f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -462,7 +462,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * OPEN_RECLAIM: * reclaim state on the server after a reboot. */ -static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; struct nfs4_opendata *opendata; @@ -478,7 +478,7 @@ static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state } delegation_type = delegation->type; } - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(ctx->path.dentry, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; @@ -490,13 +490,13 @@ static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state return status; } -static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_open_reclaim(sp, state, dentry); + err = _nfs4_do_open_reclaim(ctx, state); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -512,12 +512,12 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_reclaim(sp, state, ctx->path.dentry); + ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs4_state_owner *sp = state->owner; struct nfs4_opendata *opendata; @@ -525,7 +525,7 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) return 0; - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(ctx->path.dentry, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; @@ -536,13 +536,13 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state return ret; } -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); int err; do { - err = _nfs4_open_delegation_recall(dentry, state); + err = _nfs4_open_delegation_recall(ctx, state); switch (err) { case 0: return err; @@ -811,7 +811,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) * reclaim state on the server after a network partition. * Assumes caller holds the appropriate lock */ -static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs_delegation *delegation = NFS_I(inode)->delegation; @@ -820,34 +820,34 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st int ret; if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - ret = _nfs4_do_access(inode, sp->so_cred, openflags); + ret = _nfs4_do_access(inode, ctx->cred, openflags); if (ret < 0) return ret; memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); set_bit(NFS_DELEGATED_STATE, &state->flags); return 0; } - opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); + opendata = nfs4_opendata_alloc(ctx->path.dentry, state->owner, openflags, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) { /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(sp); - d_drop(dentry); + nfs4_drop_state_owner(state->owner); + d_drop(ctx->path.dentry); } nfs4_opendata_free(opendata); return ret; } -static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_open_expired(sp, state, dentry); + err = _nfs4_open_expired(ctx, state); if (err == -NFS4ERR_DELAY) nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -862,7 +862,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_expired(sp, state, ctx->path.dentry); + ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; } -- cgit v1.2.3-70-g09d2 From ad389da79f7bf9dc12dbc79c9c2740f9ed2f13d1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 12:30:00 -0400 Subject: NFSv4: Ensure asynchronous open() calls always pin the mountpoint A number of race conditions may currently ensue if the user presses ^C and then unmounts the partition while an asynchronous open() is in progress. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/nfs4proc.c | 48 +++++++++++++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c27258b5d3e..4948ec1dd9b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1244,7 +1244,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && (nd->flags & LOOKUP_CREATE)) + if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; lock_kernel(); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c83db9def0f..895e8e649c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -221,7 +221,7 @@ struct nfs4_opendata { struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; - struct dentry *dentry; + struct path path; struct dentry *dir; struct nfs4_state_owner *owner; struct iattr attrs; @@ -230,11 +230,11 @@ struct nfs4_opendata { int cancelled; }; -static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, +static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, int flags, const struct iattr *attrs) { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dget_parent(path->dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; @@ -246,7 +246,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (p->o_arg.seqid == NULL) goto err_free; atomic_set(&p->count, 1); - p->dentry = dget(dentry); + p->path.mnt = mntget(path->mnt); + p->path.dentry = dget(path->dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -254,7 +255,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags, p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_id; - p->o_arg.name = &dentry->d_name; + p->o_arg.name = &p->path.dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; @@ -288,7 +289,8 @@ static void nfs4_opendata_free(struct nfs4_opendata *p) nfs_free_seqid(p->o_arg.seqid); nfs4_put_state_owner(p->owner); dput(p->dir); - dput(p->dentry); + dput(p->path.dentry); + mntput(p->path.mnt); kfree(p); } } @@ -478,7 +480,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state } delegation_type = delegation->type; } - opendata = nfs4_opendata_alloc(ctx->path.dentry, state->owner, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; @@ -525,7 +527,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) return 0; - opendata = nfs4_opendata_alloc(ctx->path.dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; @@ -827,7 +829,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s set_bit(NFS_DELEGATED_STATE, &state->flags); return 0; } - opendata = nfs4_opendata_alloc(ctx->path.dentry, state->owner, openflags, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); @@ -955,7 +957,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -975,7 +977,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st goto err_put_state_owner; down_read(&clp->cl_sem); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) goto err_release_rwsem; @@ -1006,14 +1008,14 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1259,6 +1261,10 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, str struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; @@ -1277,7 +1283,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; - state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) @@ -1294,6 +1300,10 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) int nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct rpc_cred *cred; struct nfs4_state *state; @@ -1302,7 +1312,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st return PTR_ERR(cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); if (IS_ERR(state)) - state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { @@ -1752,6 +1762,10 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct nfs4_state *state; struct rpc_cred *cred; int status = 0; @@ -1761,7 +1775,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = PTR_ERR(cred); goto out; } - state = nfs4_do_open(dir, dentry, flags, sattr, cred); + state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -1774,7 +1788,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (status == 0) nfs_setattr_update_inode(state->inode, sattr); } - if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) status = nfs4_intent_set_file(nd, dentry, state); else nfs4_close_state(state, flags); -- cgit v1.2.3-70-g09d2 From 4a35bd41aff5714deb41c8f14766df3871e2e8f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 10:31:33 -0400 Subject: NFSv4: Ensure that nfs4_do_close() doesn't race with umount nfs4_do_close() does not currently have any way to ensure that the user won't attempt to unmount the partition while the asynchronous RPC call is completing. This again may cause Oopses in nfs_update_inode(). Add a vfsmount argument to nfs4_close_state to ensure that the partition remains mounted while we're closing the file. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 19 +------------------ fs/nfs/nfs4_fs.h | 6 +++--- fs/nfs/nfs4proc.c | 35 ++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 4 ++-- 4 files changed, 26 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cc7a9064be9..23ecf0334a1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -490,7 +490,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) spin_unlock(&inode->i_lock); } if (ctx->state != NULL) - nfs4_close_state(ctx->state, ctx->mode); + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->path.dentry); @@ -1103,27 +1103,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); - /* Now clear out any remaining state */ - while (!list_empty(&nfsi->open_states)) { - struct nfs4_state *state; - - state = list_entry(nfsi->open_states.next, - struct nfs4_state, - inode_states); - dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", - __FUNCTION__, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - state); - BUG_ON(atomic_read(&state->count) != 1); - nfs4_close_state(state, state->state); - } } #endif diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cf3a17eb5c0..c97a0ad8430 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -165,7 +165,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -196,7 +196,7 @@ extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -222,7 +222,7 @@ extern struct svc_version nfs4_callback_version1; #else -#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_close_state(a, b, c) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 895e8e649c9..8feaf232f2e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -453,7 +453,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * opendata->owner->so_cred, &opendata->o_res); } - nfs4_close_state(newstate, opendata->o_arg.open_flags); + nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags); } if (newstate != state) return -ESTALE; @@ -603,7 +603,7 @@ static void nfs4_open_confirm_release(void *calldata) nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: nfs4_opendata_free(data); } @@ -706,7 +706,7 @@ static void nfs4_open_release(void *calldata) nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: nfs4_opendata_free(data); } @@ -1103,6 +1103,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, } struct nfs4_closedata { + struct path path; struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; @@ -1119,6 +1120,8 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); + dput(calldata->path.dentry); + mntput(calldata->path.mnt); kfree(calldata); } @@ -1211,18 +1214,18 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +int nfs4_do_close(struct path *path, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; - calldata->inode = inode; + calldata->inode = state->inode; calldata->state = state; - calldata->arg.fh = NFS_FH(inode); + calldata->arg.fh = NFS_FH(state->inode); calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); @@ -1231,6 +1234,8 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.server = server; + calldata->path.mnt = mntget(path->mnt); + calldata->path.dentry = dget(path->dentry); status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); if (status == 0) @@ -1243,18 +1248,18 @@ out: return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) { struct file *filp; - filp = lookup_instantiate_filp(nd, dentry, NULL); + filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; return 0; } - nfs4_close_state(state, nd->intent.open.flags); + nfs4_close_state(path, state, nd->intent.open.flags); return PTR_ERR(filp); } @@ -1293,7 +1298,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) dentry = res; - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return res; } @@ -1328,10 +1333,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return 1; } - nfs4_close_state(state, openflags); + nfs4_close_state(&path, state, openflags); out_drop: d_drop(dentry); return 0; @@ -1789,9 +1794,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_setattr_update_inode(state->inode, sattr); } if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) - status = nfs4_intent_set_file(nd, dentry, state); + status = nfs4_intent_set_file(nd, &path, state); else - nfs4_close_state(state, flags); + nfs4_close_state(&path, state, flags); out: return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8ed79d5c54f..a85138ef67a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -341,7 +341,7 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -void nfs4_close_state(struct nfs4_state *state, mode_t mode) +void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; @@ -375,7 +375,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(inode, state) == 0) + if (oldstate != newstate && nfs4_do_close(path, state) == 0) return; nfs4_put_open_state(state); nfs4_put_state_owner(owner); -- cgit v1.2.3-70-g09d2 From b39e625b6e75aa70e26c13f9378756bb5f2af032 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 11 Jun 2007 23:05:07 -0400 Subject: NFSv4: Clean up nfs4_call_async() Use rpc_run_task() instead of doing it ourselves. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 +++++++++----------------- fs/nfs/nfs4state.c | 9 +++++---- net/sunrpc/sunrpc_syms.c | 1 - 3 files changed, 14 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8feaf232f2e..3cc75445a68 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -295,18 +295,6 @@ static void nfs4_opendata_free(struct nfs4_opendata *p) } } -/* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, - const struct rpc_call_ops *tk_ops, void *calldata) -{ - struct rpc_task *task; - - if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) - return -ENOMEM; - rpc_execute(task); - return 0; -} - static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) { sigset_t oldset; @@ -1218,6 +1206,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); @@ -1237,14 +1227,16 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); - status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); - if (status == 0) - goto out; - - nfs_free_seqid(calldata->arg.seqid); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; out_free_calldata: kfree(calldata); out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a85138ef67a..5d7ffbfc348 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -375,10 +375,11 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(path, state) == 0) - return; - nfs4_put_open_state(state); - nfs4_put_state_owner(owner); + if (oldstate == newstate) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state); } /* diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 73075dec83c..c46d31ca307 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -30,7 +30,6 @@ EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); EXPORT_SYMBOL(rpciod_down); EXPORT_SYMBOL(rpciod_up); -EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ -- cgit v1.2.3-70-g09d2 From a0356862bcbeb20acf64bc1a82d28a4c5bb957a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 13:26:15 -0400 Subject: NFS: Fix nfs_reval_fsid() We don't need to revalidate the fsid on the root directory. It suffices to revalidate it on the current directory. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 9 ++++----- fs/nfs/inode.c | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4948ec1dd9b..c02a7962e69 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) - /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); + /* Revalidate fsid using the parent directory */ + return __nfs_revalidate_inode(server, dir); return 0; } @@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); + error = nfs_reval_fsid(dir, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 23ecf0334a1..7bcb3dfa617 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -961,8 +961,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_changed; server = NFS_SERVER(inode); - /* Update the fsid if and only if this is the root directory */ - if (inode == inode->i_sb->s_root->d_inode + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) server->fsid = fattr->fsid; -- cgit v1.2.3-70-g09d2 From 83d93f2229348837bf988a1048f7f38789474471 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Jun 2007 09:58:08 -0400 Subject: NFS: Use GFP_HIGHUSER for page allocation in nfs_symlink() nfs_symlink() allocates a GFP_KERNEL page for the pagecache. Most pagecache pages are allocated using GFP_HIGHUSER, and there's no reason not to do that in nfs_symlink() as well. Signed-off-by: Jeff Layton --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c02a7962e69..0f41678fd7e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1534,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym lock_kernel(); - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_HIGHUSER); if (!page) { unlock_kernel(); return -ENOMEM; -- cgit v1.2.3-70-g09d2 From fc6ae3cf482c385a6fe87ba119d399bb85aa670b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 19:13:47 -0400 Subject: NFS: Re-enable forced umounts They disappeared some time around 2.6.18. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ca20d3cc260..14c7923697d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -430,7 +430,20 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { + struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); + struct rpc_clnt *rpc; + shrink_submounts(vfsmnt, &nfs_automount_list); + + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); } /* -- cgit v1.2.3-70-g09d2 From aa53ed541a1fec78a78d02afc8b042d040cc080d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 5 Jun 2007 14:49:03 -0400 Subject: NFS4: on a O_EXCL OPEN make sure SETATTR sets the fields holding the verifier The Linux NFS4 client simply skips over the bitmask in an O_EXCL open call and so it doesn't bother to reset any fields that may be holding the verifier. This patch has us save the first two words of the bitmask (which is all the current client has #defines for). The client then later checks this bitmask and turns on the appropriate flags in the sattr->ia_verify field for the following SETATTR call. This patch only currently checks to see if the server used the atime and mtime slots for the verifier (which is what the Linux server uses for this). I'm not sure of what other fields the server could reasonably use, but adding checks for others should be trivial. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++++ fs/nfs/nfs4xdr.c | 9 +++++++-- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 1 + 4 files changed, 29 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3cc75445a68..fee2d14b158 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -942,6 +942,22 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st return res; } +/* + * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* + * fields corresponding to attributes that were used to store the verifier. + * Make sure we clobber those fields in the later setattr call + */ +static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) +{ + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && + !(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; + + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && + !(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; +} + /* * Returns a referenced nfs4_state */ @@ -973,6 +989,9 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct if (status != 0) goto err_opendata_free; + if (opendata->o_arg.open_flags & O_EXCL) + nfs4_exclusive_attrset(opendata, sattr); + status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); if (state == NULL) @@ -1784,6 +1803,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); + nfs_post_op_update_inode(state->inode, &fattr); } if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) status = nfs4_intent_set_file(nd, &path, state); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8003c91ccb9..1fcca516e6e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3269,7 +3269,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { __be32 *p; - uint32_t bmlen; + uint32_t savewords, bmlen, i; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -3287,7 +3287,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) goto xdr_error; READ_BUF(bmlen << 2); - p += bmlen; + savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); + for (i = 0; i < savewords; ++i) + READ32(res->attrset[i]); + for (; i < NFS4_BITMAP_SIZE; i++) + res->attrset[i] = 0; + return decode_delegation(xdr, res); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 7e7f33a38fc..8726491de15 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -15,6 +15,7 @@ #include +#define NFS4_BITMAP_SIZE 2 #define NFS4_VERIFIER_SIZE 8 #define NFS4_STATEID_SIZE 16 #define NFS4_FHSIZE 128 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 10c26ed0db7..f7100df3a69 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -144,6 +144,7 @@ struct nfs_openres { nfs4_stateid delegation; __u32 do_recall; __u64 maxsize; + __u32 attrset[NFS4_BITMAP_SIZE]; }; /* -- cgit v1.2.3-70-g09d2 From e2f032e9ef66e33089d09452892696ea97d1dca1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 19:27:00 -0400 Subject: NFS: nfs3_proc_create() should use nfs_post_op_update_inode() Also get rid of a redundant call to nfs_setattr_update_inode(). The call to nfs3_proc_setattr() already takes care of that. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 45268d6def2..814d886b6aa 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -335,9 +335,7 @@ again: * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); - if (status == 0) - nfs_setattr_update_inode(dentry->d_inode, sattr); - nfs_refresh_inode(dentry->d_inode, &fattr); + nfs_post_op_update_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } if (status != 0) -- cgit v1.2.3-70-g09d2 From a50f7951a31d3b976e829250853f89c9d2da32c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 19:23:43 -0400 Subject: NFS: Fix an Oops in the nfs_access_cache_shrinker() The nfs_access_cache_shrinker may race with nfs_access_zap_cache(). Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0f41678fd7e..322141f4ab4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1743,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) struct nfs_inode *nfsi; struct nfs_access_entry *cache; - spin_lock(&nfs_access_lru_lock); restart: + spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -1769,6 +1769,7 @@ remove_lru_entry: clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); } spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); iput(inode); goto restart; } -- cgit v1.2.3-70-g09d2 From c03b40246123b2ced79e2620d1d2c089bb12369a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 13:26:38 -0400 Subject: NFS: Convert struct nfs_page to use krefs Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 14 ++++++++------ fs/nfs/write.c | 6 +++--- include/linux/nfs_page.h | 10 +++++----- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f8a4ba53393..257a7f8b236 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - atomic_set(&req->wb_count, 1); req->wb_context = get_nfs_open_context(ctx); - + kref_init(&req->wb_kref); return req; } @@ -160,11 +159,9 @@ void nfs_clear_request(struct nfs_page *req) * * Note: Should never be called with the spinlock held! */ -void -nfs_release_request(struct nfs_page *req) +static void nfs_free_request(struct kref *kref) { - if (!atomic_dec_and_test(&req->wb_count)) - return; + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); /* Release struct file or cached credential */ nfs_clear_request(req); @@ -172,6 +169,11 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } +void nfs_release_request(struct nfs_page *req) +{ + kref_put(&req->wb_kref, nfs_free_request); +} + static int nfs_wait_bit_interruptible(void *word) { int ret = 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9e7c21da864..e9404328ac0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -117,7 +117,7 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) if (PagePrivate(page)) { req = (struct nfs_page *)page_private(page); if (req != NULL) - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); } return req; } @@ -398,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (PageDirty(req->wb_page)) set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 0; } @@ -531,7 +531,7 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u next = req->wb_index + 1; BUG_ON(!NFS_WBACK_BUSY(req)); - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index bd193af8016..c780e7e39f9 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * Valid flags for the radix tree @@ -42,7 +42,7 @@ struct nfs_page { unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ - atomic_t wb_count; /* reference count */ + struct kref wb_kref; /* reference count */ unsigned long wb_flags; struct nfs_writeverf wb_verf; /* Commit cookie */ }; @@ -89,7 +89,7 @@ extern void nfs_clear_page_writeback(struct nfs_page *req); /* - * Lock the page of an asynchronous request without incrementing the wb_count + * Lock the page of an asynchronous request without getting a new reference */ static inline int nfs_lock_request_dontget(struct nfs_page *req) @@ -98,14 +98,14 @@ nfs_lock_request_dontget(struct nfs_page *req) } /* - * Lock the page of an asynchronous request + * Lock the page of an asynchronous request and take a reference */ static inline int nfs_lock_request(struct nfs_page *req) { if (test_and_set_bit(PG_BUSY, &req->wb_flags)) return 0; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 1; } -- cgit v1.2.3-70-g09d2 From 9fd367f0f376ccfb2592eed9be0eece70429894f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:10:24 -0400 Subject: NFS cleanup: Rename NFS_PAGE_TAG_WRITEBACK to NFS_PAGE_TAG_LOCKED Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 14 +++++++------- fs/nfs/write.c | 16 ++++++++-------- include/linux/nfs_page.h | 5 ++--- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 257a7f8b236..23e9dea2090 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -108,29 +108,29 @@ void nfs_unlock_request(struct nfs_page *req) } /** - * nfs_set_page_writeback_locked - Lock a request for writeback + * nfs_set_page_tag_locked - Tag a request as locked * @req: */ -int nfs_set_page_writeback_locked(struct nfs_page *req) +static int nfs_set_page_tag_locked(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (!nfs_lock_request(req)) return 0; - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } /** - * nfs_clear_page_writeback - Unlock request and wake up sleepers + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ -void nfs_clear_page_writeback(struct nfs_page *req) +void nfs_clear_page_tag_locked(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (req->wb_page != NULL) { spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); spin_unlock(&nfsi->req_lock); } nfs_unlock_request(req); @@ -421,7 +421,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, idx_start = req->wb_index + 1; if (req->wb_list_head != head) continue; - if (nfs_set_page_writeback_locked(req)) { + if (nfs_set_page_tag_locked(req)) { nfs_list_remove_request(req); nfs_list_add_request(req, dst); res++; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e9404328ac0..754066cc914 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -289,7 +289,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, BUG(); } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_WRITEBACK); + NFS_PAGE_TAG_LOCKED); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(req_lock); nfs_pageio_add_request(pgio, req); @@ -524,7 +524,7 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u idx_end = idx_start + npages - 1; next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { if (req->wb_index > idx_end) break; @@ -759,7 +759,7 @@ static void nfs_writepage_release(struct nfs_page *req) nfs_inode_remove_request(req); } else nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } static inline int flush_task_priority(int how) @@ -888,7 +888,7 @@ out_bad: } nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -931,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1049,7 +1049,7 @@ remove_request: nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1212,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_mark_request_commit(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1265,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk(" mismatch\n"); nfs_redirty_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c780e7e39f9..042434c39b7 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -21,7 +21,7 @@ /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_WRITEBACK 0 +#define NFS_PAGE_TAG_LOCKED 0 /* * Valid flags for a dirty buffer @@ -84,8 +84,7 @@ extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern int nfs_set_page_writeback_locked(struct nfs_page *req); -extern void nfs_clear_page_writeback(struct nfs_page *req); +extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* -- cgit v1.2.3-70-g09d2 From 5c36968343fcd013a3f7ae93f246c2e75596780b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:27:42 -0400 Subject: NFS cleanup: speed up nfs_scan_commit using radix tree tags Add a tag for requests that are waiting for a COMMIT Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 14 +++++++------- fs/nfs/write.c | 6 +++++- include/linux/nfs_page.h | 5 +++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 23e9dea2090..ad90cbe7670 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -381,10 +381,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) /** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode - * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space @@ -392,9 +392,9 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's req_lock when calling this function */ -int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, - unsigned int npages) + unsigned int npages, int tag) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; @@ -409,9 +409,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, idx_end = idx_start + npages - 1; for (;;) { - found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES); + NFS_SCAN_MAXENTRIES, tag); if (found <= 0) break; for (i = 0; i < found; i++) { @@ -419,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; - if (req->wb_list_head != head) - continue; if (nfs_set_page_tag_locked(req)) { nfs_list_remove_request(req); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); nfs_list_add_request(req, dst); res++; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 754066cc914..0f779ca12ec 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -462,6 +462,9 @@ nfs_mark_request_commit(struct nfs_page *req) nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -575,7 +578,8 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, dst, idx_start, npages, + NFS_PAGE_TAG_COMMIT); nfsi->ncommit -= res; if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 042434c39b7..481a42105d6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -22,6 +22,7 @@ * Valid flags for the radix tree */ #define NFS_PAGE_TAG_LOCKED 0 +#define NFS_PAGE_TAG_COMMIT 1 /* * Valid flags for a dirty buffer @@ -71,8 +72,8 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, - pgoff_t idx_start, unsigned int npages); +extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, + pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), -- cgit v1.2.3-70-g09d2 From 2aefa104313996d1a9582476cee53d1296c834bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:40:59 -0400 Subject: NFS: Remove the redundant 'dirty' and 'commit' lists from nfs_inode Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 --- fs/nfs/write.c | 3 --- include/linux/nfs_fs.h | 5 +---- include/linux/nfs_page.h | 5 +---- 4 files changed, 2 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7bcb3dfa617..e7d2bba900b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1149,14 +1149,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&nfsi->vfs_inode); spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0f779ca12ec..9ef9ec746bf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -459,7 +459,6 @@ nfs_mark_request_commit(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); - nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, @@ -581,8 +580,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u res = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); } return res; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 07eea8f64ec..a9420547673 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -156,12 +156,9 @@ struct nfs_inode { * This is the list of dirty unwritten pages. */ spinlock_t req_lock; - struct list_head dirty; - struct list_head commit; struct radix_tree_root nfs_page_tree; - unsigned int ndirty, - ncommit, + unsigned int ncommit, npages; /* Open contexts for shared mmap writes */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 481a42105d6..78e60798d10 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -34,8 +34,7 @@ struct nfs_inode; struct nfs_page { - struct list_head wb_list, /* Defines state of page: */ - *wb_list_head; /* read/write/commit */ + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ @@ -118,7 +117,6 @@ static inline void nfs_list_add_request(struct nfs_page *req, struct list_head *head) { list_add_tail(&req->wb_list, head); - req->wb_list_head = head; } @@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req) if (list_empty(&req->wb_list)) return; list_del_init(&req->wb_list); - req->wb_list_head = NULL; } static inline struct nfs_page * -- cgit v1.2.3-70-g09d2 From dce34ce298d85b81630401f4feb4bd7ac77fe9c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:47:53 -0400 Subject: NFS: Prevent integer overflow in nfs_scan_list() Also ensure that nfs_inode ncommit and npages are large enough to represent all possible values for the number of pages. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ include/linux/nfs_fs.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ad90cbe7670..68f6bf12200 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -425,6 +425,8 @@ int nfs_scan_list(struct nfs_inode *nfsi, req->wb_index, tag); nfs_list_add_request(req, dst); res++; + if (res == INT_MAX) + goto out; } } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a9420547673..750708ccd70 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -158,7 +158,7 @@ struct nfs_inode { spinlock_t req_lock; struct radix_tree_root nfs_page_tree; - unsigned int ncommit, + unsigned long ncommit, npages; /* Open contexts for shared mmap writes */ -- cgit v1.2.3-70-g09d2 From edc05fc1c24ba49dae585da1b2a22686f0b221f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 16:02:34 -0400 Subject: NFS: reduce latency by using conditional rescheduling in nfs_scan_list Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 68f6bf12200..8d2642f24b8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -429,7 +429,8 @@ int nfs_scan_list(struct nfs_inode *nfsi, goto out; } } - + /* for latency reduction */ + cond_resched_lock(&nfsi->req_lock); } out: return res; -- cgit v1.2.3-70-g09d2 From 3bec63db55463365110d00721ed60a31e4614cb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 16:02:44 -0400 Subject: NFS: Convert struct nfs_open_context to use a kref Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 40 +++++++++++++++++++++++----------------- include/linux/nfs_fs.h | 3 ++- 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e7d2bba900b..01fc8ab0c56 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -461,7 +461,6 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - atomic_set(&ctx->count, 1); ctx->path.dentry = dget(dentry); ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); @@ -469,6 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; + kref_init(&ctx->kref); } return ctx; } @@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + kref_get(&ctx->kref); return ctx; } -void put_nfs_open_context(struct nfs_open_context *ctx) +static void nfs_free_open_context(struct kref *kref) { - if (atomic_dec_and_test(&ctx->count)) { - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->path.dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } - if (ctx->state != NULL) - nfs4_close_state(&ctx->path, ctx->state, ctx->mode); - if (ctx->cred != NULL) - put_rpccred(ctx->cred); - dput(ctx->path.dentry); - mntput(ctx->path.mnt); - kfree(ctx); + struct nfs_open_context *ctx = container_of(kref, + struct nfs_open_context, kref); + + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->path.dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); } + if (ctx->state != NULL) + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->path.dentry); + mntput(ctx->path.mnt); + kfree(ctx); +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + kref_put(&ctx->kref, nfs_free_open_context); } /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 750708ccd70..bf24151d63b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -30,6 +30,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -70,7 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { - atomic_t count; + struct kref kref; struct path path; struct rpc_cred *cred; struct nfs4_state *state; -- cgit v1.2.3-70-g09d2 From c6d00e639bdec5f33460bc95bae4efda7177a6ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 16:02:44 -0400 Subject: NFSv4: Convert struct nfs4_opendata to use struct kref Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fee2d14b158..d90209e7958 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -214,7 +214,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) } struct nfs4_opendata { - atomic_t count; + struct kref kref; struct nfs_openargs o_arg; struct nfs_openres o_res; struct nfs_open_confirmargs c_arg; @@ -245,7 +245,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - atomic_set(&p->count, 1); p->path.mnt = mntget(path->mnt); p->path.dentry = dget(path->dentry); p->dir = parent; @@ -275,6 +274,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; + kref_init(&p->kref); return p; err_free: kfree(p); @@ -283,16 +283,23 @@ err: return NULL; } -static void nfs4_opendata_free(struct nfs4_opendata *p) +static void nfs4_opendata_free(struct kref *kref) { - if (p != NULL && atomic_dec_and_test(&p->count)) { - nfs_free_seqid(p->o_arg.seqid); - nfs4_put_state_owner(p->owner); - dput(p->dir); - dput(p->path.dentry); - mntput(p->path.mnt); - kfree(p); - } + struct nfs4_opendata *p = container_of(kref, + struct nfs4_opendata, kref); + + nfs_free_seqid(p->o_arg.seqid); + nfs4_put_state_owner(p->owner); + dput(p->dir); + dput(p->path.dentry); + mntput(p->path.mnt); + kfree(p); +} + +static void nfs4_opendata_put(struct nfs4_opendata *p) +{ + if (p != NULL) + kref_put(&p->kref, nfs4_opendata_free); } static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) @@ -476,7 +483,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return status; } @@ -522,7 +529,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } @@ -593,7 +600,7 @@ static void nfs4_open_confirm_release(void *calldata) if (state != NULL) nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_confirm_ops = { @@ -611,7 +618,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); + kref_get(&data->kref); /* * If rpc_run_task() ends up calling ->rpc_release(), we * want to ensure that it takes the 'error' code path. @@ -696,7 +703,7 @@ static void nfs4_open_release(void *calldata) if (state != NULL) nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_ops = { @@ -717,7 +724,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); + kref_get(&data->kref); /* * If rpc_run_task() ends up calling ->rpc_release(), we * want to ensure that it takes the 'error' code path. @@ -826,7 +833,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s nfs4_drop_state_owner(state->owner); d_drop(ctx->path.dentry); } - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } @@ -987,7 +994,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct status = _nfs4_proc_open(opendata); if (status != 0) - goto err_opendata_free; + goto err_opendata_put; if (opendata->o_arg.open_flags & O_EXCL) nfs4_exclusive_attrset(opendata, sattr); @@ -995,16 +1002,16 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); if (state == NULL) - goto err_opendata_free; + goto err_opendata_put; if (opendata->o_res.delegation_type != 0) nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; return 0; -err_opendata_free: - nfs4_opendata_free(opendata); +err_opendata_put: + nfs4_opendata_put(opendata); err_release_rwsem: up_read(&clp->cl_sem); err_put_state_owner: -- cgit v1.2.3-70-g09d2 From 34f52e3591f241b825353ba27def956d8487c400 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:31 -0400 Subject: SUNRPC: Convert rpc_clnt->cl_users to a kref Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 12 +++------- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 57 ++++++++++++++++++++++----------------------- net/sunrpc/rpc_pipe.c | 2 +- net/sunrpc/sched.c | 6 ++--- 5 files changed, 35 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 96070bff93f..c252a1c9585 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -161,15 +161,9 @@ nlm_destroy_host(struct nlm_host *host) */ nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } + clnt = host->h_rpcclnt; + if (clnt != NULL) + rpc_shutdown_client(clnt); kfree(host); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2f4b520a741..003d8ea70c1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -24,8 +24,8 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { + struct kref cl_kref; /* Number of references */ atomic_t cl_count; /* Number of clones */ - atomic_t cl_users; /* number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 424dfdc6862..254a6e1a577 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -121,7 +121,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_users, 0); atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; @@ -157,6 +156,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + kref_init(&clnt->cl_kref); + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) goto out_no_path; @@ -272,10 +273,10 @@ rpc_clone_client(struct rpc_clnt *clnt) if (!new) goto out_no_clnt; atomic_set(&new->cl_count, 1); - atomic_set(&new->cl_users, 0); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; + kref_init(&new->cl_kref); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; @@ -311,40 +312,28 @@ out_no_clnt: int rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_server, - atomic_read(&clnt->cl_users)); + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); - while (atomic_read(&clnt->cl_users) > 0) { + while (!list_empty(&clnt->cl_tasks)) { /* Don't let rpc_release_client destroy us */ clnt->cl_oneshot = 0; clnt->cl_dead = 0; rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, - !atomic_read(&clnt->cl_users), 1*HZ); - } - - if (atomic_read(&clnt->cl_users) < 0) { - printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", - clnt, atomic_read(&clnt->cl_users)); -#ifdef RPC_DEBUG - rpc_show_tasks(); -#endif - BUG(); + list_empty(&clnt->cl_tasks), 1*HZ); } return rpc_destroy_client(clnt); } /* - * Delete an RPC client + * Free an RPC client */ -int -rpc_destroy_client(struct rpc_clnt *clnt) +static void +rpc_free_client(struct kref *kref) { - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - BUG_ON(atomic_read(&clnt->cl_users) != 0); + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); @@ -368,23 +357,33 @@ out_free: clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); kfree(clnt); - return 0; } /* - * Release an RPC client + * Release reference to the RPC client */ void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, atomic_read(&clnt->cl_users)); + dprintk("RPC: rpc_release_client(%p)\n", clnt); - if (!atomic_dec_and_test(&clnt->cl_users)) - return; - wake_up(&destroy_wait); + if (list_empty(&clnt->cl_tasks)) + wake_up(&destroy_wait); if (clnt->cl_oneshot || clnt->cl_dead) rpc_destroy_client(clnt); + kref_put(&clnt->cl_kref, rpc_free_client); +} + +/* + * Delete an RPC client + */ +int +rpc_destroy_client(struct rpc_clnt *clnt) +{ + if (!atomic_dec_and_test(&clnt->cl_count)) + return 1; + kref_put(&clnt->cl_kref, rpc_free_client); + return 0; } /** diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5887457dc93..826190dacfc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -344,7 +344,7 @@ rpc_info_open(struct inode *inode, struct file *file) mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); m->private = clnt; } else { single_release(inode, file); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 0e9fbbd4f98..bb12983580a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -846,7 +846,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_workqueue = rpciod_workqueue; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) @@ -898,9 +898,7 @@ out: cleanup: /* Check whether to release the client */ if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ + kref_get(&clnt->cl_kref); /* pretend we were used ... */ rpc_release_client(clnt); } goto out; -- cgit v1.2.3-70-g09d2 From 90c5755ff5111ffdcca10a1e8a823dba29f37b6d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 19:49:36 -0400 Subject: SUNRPC: Kill rpc_clnt->cl_oneshot Replace it with explicit calls to rpc_shutdown_client() or rpc_destroy_client() (for the case of asynchronous calls). Signed-off-by: Trond Myklebust --- fs/lockd/mon.c | 2 +- fs/nfs/mount_clnt.c | 4 ++-- include/linux/sunrpc/clnt.h | 10 ++++------ net/sunrpc/clnt.c | 10 +--------- net/sunrpc/rpcb_clnt.c | 6 ++++-- net/sunrpc/sched.c | 14 ++------------ 6 files changed, 14 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2102e2d0134..3353ed8421a 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) status); else status = 0; + rpc_shutdown_client(clnt); out: return status; } @@ -138,7 +139,6 @@ nsm_create(void) .program = &nsm_program, .version = SM_VERSION, .authflavor = RPC_AUTH_NULL, - .flags = (RPC_CLNT_CREATE_ONESHOT), }; return rpc_create(&args); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca5a266a314..878d7a5cb6d 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -69,6 +69,7 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); + rpc_shutdown_client(mnt_clnt); return status < 0? status : (result.status? -EACCES : 0); } @@ -84,8 +85,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, .program = &mnt_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_INTR), + .flags = RPC_CLNT_CREATE_INTR, }; return rpc_create(&args); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3ef6d629a..fe7ea65ed0a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -44,8 +44,7 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1,/* use getport() */ - cl_oneshot : 1;/* dispose after use */ + cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -112,10 +111,9 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) -#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) -#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) -#define RPC_CLNT_CREATE_NOPING (1UL << 5) -#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fb65249538d..34662dfa9cc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -249,8 +249,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_intr = 1; if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_ONESHOT) - clnt->cl_oneshot = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; @@ -285,7 +283,6 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; - new->cl_oneshot = 0; INIT_LIST_HEAD(&new->cl_tasks); spin_lock_init(&new->cl_lock); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); @@ -304,8 +301,7 @@ out_no_clnt: /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot is cleared, - * or else the client would be destroyed when the last task releases it. + * requests. */ int rpc_shutdown_client(struct rpc_clnt *clnt) @@ -314,8 +310,6 @@ rpc_shutdown_client(struct rpc_clnt *clnt) clnt->cl_protname, clnt->cl_server); while (!list_empty(&clnt->cl_tasks)) { - /* Don't let rpc_release_client destroy us */ - clnt->cl_oneshot = 0; rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, list_empty(&clnt->cl_tasks), 1*HZ); @@ -366,8 +360,6 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - if (clnt->cl_oneshot) - rpc_destroy_client(clnt); kref_put(&clnt->cl_kref, rpc_free_client); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6c7aa8a1f0c..00853a32649 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -184,8 +184,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), + .flags = RPC_CLNT_CREATE_NOPING, }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); @@ -238,6 +237,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -286,6 +286,7 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) @@ -379,6 +380,7 @@ void rpcb_getport(struct rpc_task *task) } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + rpc_destroy_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index bb12983580a..d95fe4e40eb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -876,9 +876,7 @@ static void rpc_free_task(struct rcu_head *rcu) } /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { @@ -886,7 +884,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task = rpc_alloc_task(); if (!task) - goto cleanup; + goto out; rpc_init_task(task, clnt, flags, tk_ops, calldata); @@ -894,14 +892,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; - -cleanup: - /* Check whether to release the client */ - if (clnt) { - kref_get(&clnt->cl_kref); /* pretend we were used ... */ - rpc_release_client(clnt); - } - goto out; } -- cgit v1.2.3-70-g09d2 From f61534dfd38f895b203e2aadaba04f21a992ca8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 17:31:58 -0400 Subject: SUNRPC: Remove redundant calls to rpciod_up()/rpciod_down() Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 6 ------ fs/nfs/client.c | 15 --------------- fs/nfsd/nfs4callback.c | 12 +++--------- fs/nfsd/nfs4state.c | 1 - include/linux/nfs_fs_sb.h | 1 - net/sunrpc/sunrpc_syms.c | 2 -- 6 files changed, 3 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 126b1bf02c0..26809325469 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp) /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); - /* kick rpciod */ - rpciod_up(); - dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); if (!nlm_timeout) @@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp) /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* release rpciod */ - rpciod_down(); - /* Release module */ unlock_kernel(); module_put_and_exit(0); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 881fa490092..71d4c4cdac5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, int nfsversion) { struct nfs_client *clp; - int error; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto error_1; - } - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); - if (nfsversion == 4) { if (nfs_callback_up() < 0) goto error_2; @@ -154,9 +145,6 @@ error_3: if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); error_2: - rpciod_down(); - __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); -error_1: kfree(clp); error_0: return NULL; @@ -198,9 +186,6 @@ static void nfs_free_client(struct nfs_client *clp) if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); - if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); - kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 864090edc28..6b1b487db1e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -429,29 +429,23 @@ nfsd4_probe_callback(struct nfs4_client *clp) goto out_err; } - /* Kick rpciod, put the call on the wire. */ - if (rpciod_up() != 0) - goto out_clnt; - /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) - goto out_rpciod; + goto out_release_clp; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); - goto out_rpciod; + goto out_release_clp; } return; -out_rpciod: +out_release_clp: atomic_dec(&clp->cl_count); - rpciod_down(); -out_clnt: rpc_shutdown_client(cb->cb_client); out_err: cb->cb_client = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cc8ce422ab..8c52913d7cb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp) if (clnt) { clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); - rpciod_down(); } } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 52b4378311c..144d955dc46 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,7 +16,6 @@ struct nfs_client { #define NFS_CS_INITING 1 /* busy initialising */ int cl_nfsversion; /* NFS protocol version */ unsigned long cl_res_state; /* NFS resources state */ -#define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 02e83e15fef..b99b11b1146 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -28,8 +28,6 @@ EXPORT_SYMBOL(rpc_init_task); EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpciod_down); -EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ -- cgit v1.2.3-70-g09d2 From 1be27f36601973815171db684c711d30557cf50c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jun 2007 14:29:04 -0400 Subject: SUNRPC: Remove the tk_auth macro... We should almost always be deferencing the rpc_auth struct by means of the credential's cr_auth field instead of the rpc_clnt->cl_auth anyway. Fix up that historical mistake, and remove the macro that propagated it. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 6 +++--- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4xdr.c | 10 +++++----- include/linux/sunrpc/sched.h | 1 - net/sunrpc/auth.c | 25 +++++++++++++------------ net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/auth_unix.c | 2 +- net/sunrpc/clnt.c | 2 +- 8 files changed, 29 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cd3ca7b5d3d..7fcc78f2aa7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) static int nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 offset = (u32)args->offset; u32 count = args->count; @@ -380,7 +380,7 @@ static int nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) { struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) static int nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b51df8eb9f0..b4647a22f34 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg static int nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) static int nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -643,7 +643,7 @@ static int nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, struct nfs3_getaclargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); @@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1fcca516e6e..859b1363325 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1071,7 +1071,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -1117,7 +1117,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; __be32 *p; @@ -1735,7 +1735,7 @@ out: */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1795,7 +1795,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { .nops = 2, }; @@ -2030,7 +2030,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs struct compound_hdr hdr = { .nops = 3, }; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; int status; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 3387b008cdf..8ea077db009 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -98,7 +98,6 @@ struct rpc_task { unsigned short tk_pid; /* debugging aid */ #endif }; -#define tk_auth tk_client->cl_auth #define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 81f4c776c55..74baf87ccff 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -371,7 +371,7 @@ EXPORT_SYMBOL(rpcauth_init_cred); struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, @@ -381,7 +381,7 @@ rpcauth_bindcred(struct rpc_task *task) int flags = 0; dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_auth->au_ops->au_name); + task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); get_group_info(acred.group_info); if (task->tk_flags & RPC_TASK_ROOTCREDS) flags |= RPCAUTH_LOOKUP_ROOTCREDS; @@ -397,11 +397,12 @@ rpcauth_bindcred(struct rpc_task *task) void rpcauth_holdcred(struct rpc_task *task) { - dprintk("RPC: %5u holding %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, - task->tk_msg.rpc_cred); - if (task->tk_msg.rpc_cred) - get_rpccred(task->tk_msg.rpc_cred); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + if (cred != NULL) { + get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); + } } void @@ -441,7 +442,7 @@ rpcauth_unbindcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); put_rpccred(cred); task->tk_msg.rpc_cred = NULL; @@ -453,7 +454,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crmarshal(task, p); } @@ -464,7 +465,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crvalidate(task, p); } @@ -505,7 +506,7 @@ rpcauth_refreshcred(struct rpc_task *task) int err; dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); if (err < 0) @@ -519,7 +520,7 @@ rpcauth_invalcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); if (cred) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 15da6f82db3..debcda86467 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -906,7 +906,7 @@ gss_validate(struct rpc_task *task, __be32 *p) goto out_bad; /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -1206,7 +1206,7 @@ gss_unwrap_resp(struct rpc_task *task, break; } /* take into account extra slack for integrity and privacy cases: */ - task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: status = decode(rqstp, p, obj); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d9c50d810d1..5ed91e5bcee 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -204,7 +204,7 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_auth->au_rslack = (size >> 2) + 2; + task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 28a789419f6..50af8bbe7f2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -787,7 +787,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_auth->au_cslack; + unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; -- cgit v1.2.3-70-g09d2 From 4e56e082dd89266d320ccfbc7bd0102186a765ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Jul 2007 18:13:52 -0400 Subject: NFSv4: Clean up _nfs4_proc_lookup() vs _nfs4_proc_lookupfh() They differ only slightly in the arguments they take. Why have they not been merged? Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d90209e7958..84d0b7e0dd6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1592,8 +1592,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, dprintk("NFS call lookupfh %s\n", name->name); status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply lookupfh: %d\n", status); - if (status == -NFS4ERR_MOVED) - status = -EREMOTE; return status; } @@ -1604,10 +1602,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_lookupfh(server, dirfh, name, - fhandle, fattr), - &exception); + err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); + /* FIXME: !!!! */ + if (err == -NFS4ERR_MOVED) { + err = -EREMOTE; + break; + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -1615,28 +1616,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int status; - struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_lookup_arg args = { - .bitmask = server->attr_bitmask, - .dir_fh = NFS_FH(dir), - .name = name, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; - - nfs_fattr_init(fattr); + int status; dprintk("NFS call lookup %s\n", name->name); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); if (status == -NFS4ERR_MOVED) status = nfs4_get_referral(dir, name, fattr, fhandle); dprintk("NFS reply lookup: %d\n", status); -- cgit v1.2.3-70-g09d2 From 587142f85f796cf0b823dd3080e815f02ff6b952 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 09:57:54 -0400 Subject: NFS: Replace NFS_I(inode)->req_lock with inode->i_lock There is no justification for keeping a special spinlock for the exclusive use of the NFS writeback code. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/pagelist.c | 11 ++++--- fs/nfs/write.c | 84 ++++++++++++++++++++++++-------------------------- include/linux/nfs_fs.h | 1 - 4 files changed, 46 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 01fc8ab0c56..9d5124166d2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1154,7 +1154,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct nfs_inode *nfsi = (struct nfs_inode *) foo; inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 8d2642f24b8..f56dae5216f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -126,12 +126,13 @@ static int nfs_set_page_tag_locked(struct nfs_page *req) */ void nfs_clear_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); if (req->wb_page != NULL) { - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); } nfs_unlock_request(req); } @@ -390,7 +391,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) * If the number of requests is set to 0, the entire address_space * starting at index idx_start, is scanned. * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's req_lock when calling this function + * You must be holding the inode's i_lock when calling this function */ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, @@ -430,7 +431,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, } } /* for latency reduction */ - cond_resched_lock(&nfsi->req_lock); + cond_resched_lock(&nfsi->vfs_inode.i_lock); } out: return res; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9ef9ec746bf..73ac992ece8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -124,12 +124,12 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) static struct nfs_page *nfs_page_find_request(struct page *page) { + struct inode *inode = page->mapping->host; struct nfs_page *req = NULL; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return req; } @@ -251,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page) static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { + struct inode *inode = page->mapping->host; + struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - spinlock_t *req_lock = &nfsi->req_lock; int ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); for(;;) { req = nfs_page_find_request_locked(page); if (req == NULL) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return 1; } if (nfs_lock_request_dontget(req)) @@ -270,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) return ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); } if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { /* This request is marked for commit */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); return 1; } if (nfs_set_page_writeback(page) != 0) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); BUG(); } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); return ret; } @@ -412,7 +412,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); @@ -420,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); iput(inode); } else - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_clear_request(req); nfs_release_request(req); } @@ -458,13 +458,13 @@ nfs_mark_request_commit(struct nfs_page *req) struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } @@ -534,10 +534,10 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u BUG_ON(!NFS_WBACK_BUSY(req)); kref_get(&req->wb_kref); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); if (error < 0) return error; res++; @@ -602,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; pgoff_t rqend, end; @@ -612,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { if (!nfs_lock_request_dontget(req)) { int error; - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { @@ -628,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, } continue; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (new) nfs_release_request(new); break; @@ -639,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, nfs_lock_request_dontget(new); error = nfs_inode_add_request(inode, new); if (error) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(new); return ERR_PTR(error); } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return new; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) @@ -974,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) } if (nfs_write_need_commit(data)) { - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct inode *inode = page->mapping->host; - spin_lock(req_lock); + spin_lock(&inode->i_lock); if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { /* Do nothing we need to resend the writes */ } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { @@ -987,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) clear_bit(PG_NEED_COMMIT, &req->wb_flags); dprintk(" server reboot detected\n"); } - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); } else dprintk(" OK\n"); @@ -1277,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = { int nfs_commit_inode(struct inode *inode, int how) { - struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); int res; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (res) { int error = nfs_commit_list(inode, &head, how); if (error < 0) @@ -1301,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); @@ -1323,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr } } how &= ~FLUSH_NOCOMMIT; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) @@ -1334,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr if (pages == 0) break; if (how & FLUSH_INVALIDATE) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_cancel_commit_list(&head); ret = pages; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); continue; } pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); + } while (ret >= 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return ret; } @@ -1439,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; - spinlock_t *req_lock; struct nfs_page *req; int ret; @@ -1448,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page) inode = mapping->host; if (!inode) goto out_raced; - req_lock = &NFS_I(inode)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_release_request(req); return ret; } ret = __set_page_dirty_nobuffers(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return ret; out_raced: return !TestSetPageDirty(page); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bf24151d63b..cf395351cdd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -156,7 +156,6 @@ struct nfs_inode { /* * This is the list of dirty unwritten pages. */ - spinlock_t req_lock; struct radix_tree_root nfs_page_tree; unsigned long ncommit, -- cgit v1.2.3-70-g09d2 From 27b3f949b769a208e2849d28e7ad64cadac5d0e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 10:24:56 -0400 Subject: NFSv4: Fix a credential reference leak in nfs4_get_state_owner() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5d7ffbfc348..0030248d63e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -83,7 +83,7 @@ nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) if (!list_empty(&clp->cl_unused)) { sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list); atomic_inc(&sp->so_count); - sp->so_cred = cred; + sp->so_cred = get_rpccred(cred); list_move(&sp->so_list, &clp->cl_state_owners); clp->cl_nunused--; } @@ -175,7 +175,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; - get_rpccred(cred); new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); sp = nfs4_find_state_owner(clp, cred); @@ -185,7 +184,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); - new->so_cred = cred; + new->so_cred = get_rpccred(cred); sp = new; new = NULL; } @@ -193,7 +192,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct kfree(new); if (sp != NULL) return sp; - put_rpccred(cred); return NULL; } -- cgit v1.2.3-70-g09d2 From 7af654f8d1b7460415af5d1d326233478dd0f563 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 12:49:23 -0400 Subject: NFSv4: Don't reuse expired nfs4_state_owner structs That just confuses certain NFSv4 servers. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 10 ---------- fs/nfs/nfs4state.c | 28 ---------------------------- include/linux/nfs_fs_sb.h | 2 -- 3 files changed, 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 71d4c4cdac5..6b424407d63 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -131,7 +131,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); @@ -155,15 +154,6 @@ static void nfs4_shutdown_client(struct nfs_client *clp) #ifdef CONFIG_NFS_V4 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - while (!list_empty(&clp->cl_unused)) { - struct nfs4_state_owner *sp; - - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } BUG_ON(!list_empty(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0030248d63e..2b00c45aebe 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -75,21 +75,6 @@ nfs4_alloc_lockowner_id(struct nfs_client *clp) return clp->cl_lockowner_id ++; } -static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) -{ - struct nfs4_state_owner *sp = NULL; - - if (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list); - atomic_inc(&sp->so_count); - sp->so_cred = get_rpccred(cred); - list_move(&sp->so_list, &clp->cl_state_owners); - clp->cl_nunused--; - } - return sp; -} - struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -178,8 +163,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); sp = nfs4_find_state_owner(clp, cred); - if (sp == NULL) - sp = nfs4_client_grab_unused(clp, cred); if (sp == NULL && new != NULL) { list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; @@ -206,17 +189,6 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) - goto out_free; - if (list_empty(&sp->so_list)) - goto out_free; - list_move(&sp->so_list, &clp->cl_unused); - clp->cl_nunused++; - spin_unlock(&clp->cl_lock); - put_rpccred(cred); - cred = NULL; - return; -out_free: list_del(&sp->so_list); spin_unlock(&clp->cl_lock); put_rpccred(cred); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 144d955dc46..2cef0a68aa7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -44,8 +44,6 @@ struct nfs_client { struct list_head cl_delegations; struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; spinlock_t cl_lock; unsigned long cl_lease_time; -- cgit v1.2.3-70-g09d2 From bd625ba80d84d9de003b8a4bf61fd937b82aca09 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2007 18:38:23 -0400 Subject: NFSv4: Fix the NFSv4 owner and owner_group size estimates Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 859b1363325..932bc79a902 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -87,9 +87,11 @@ static int nfs4_stat_to_errno(int); #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + 2 * nfs4_name_maxsz)) + 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) -- cgit v1.2.3-70-g09d2 From 2cebf82883f49fd26148da5d9a43d1b4363f1d59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 13:57:28 -0400 Subject: NFSv4: Fix the underestimate of NFSv4 open request size The maximum size depends on the filename size and a number of other elements which are currently not being counted. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 63 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 932bc79a902..f6068bf3823 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -70,7 +70,8 @@ static int nfs4_stat_to_errno(int); /* lock,open owner id: * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define owner_id_maxsz (1 + 1) +#define open_owner_id_maxsz (1 + 1) +#define lock_owner_id_maxsz (1 + 1) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -120,6 +121,25 @@ static int nfs4_stat_to_errno(int); (op_decode_hdr_maxsz) #define encode_lookup_maxsz (op_encode_hdr_maxsz + \ 1 + ((3 + NFS4_FHSIZE) >> 2)) +#define encode_share_access_maxsz \ + (2) +#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz) +#define encode_opentype_maxsz (1 + encode_createmode_maxsz) +#define encode_claim_null_maxsz (1 + nfs4_name_maxsz) +#define encode_open_maxsz (op_encode_hdr_maxsz + \ + 2 + encode_share_access_maxsz + 2 + \ + open_owner_id_maxsz + \ + encode_opentype_maxsz + \ + encode_claim_null_maxsz) +#define decode_ace_maxsz (3 + nfs4_owner_maxsz) +#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \ + decode_ace_maxsz) +#define decode_change_info_maxsz (5) +#define decode_open_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + decode_change_info_maxsz + 1 + \ + nfs4_fattr_bitmap_maxsz + \ + decode_delegation_maxsz) #define encode_remove_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define encode_rename_maxsz (op_encode_hdr_maxsz + \ @@ -136,7 +156,9 @@ static int nfs4_stat_to_errno(int); #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ nfs4_fattr_maxsz) -#define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define decode_create_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + nfs4_fattr_bitmap_maxsz) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ @@ -176,16 +198,21 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 2 + \ decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ - encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 13 + 3 + 2 + 64 + \ - encode_getattr_maxsz + \ - encode_getfh_maxsz) + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_open_maxsz + \ + encode_getfh_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ - decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz + \ - decode_getfh_maxsz) + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_open_maxsz + \ + decode_getfh_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -195,12 +222,12 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 4) #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 11) + encode_open_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - 4 + 5 + 2 + 3) + decode_open_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -258,19 +285,19 @@ static int nfs4_stat_to_errno(int); op_encode_hdr_maxsz + \ 1 + 1 + 2 + 2 + \ 1 + 4 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz + \ op_decode_hdr_maxsz + \ 2 + 2 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ 1 + 2 + 2 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ -- cgit v1.2.3-70-g09d2 From e6889620e89525ebf41f0eed937edb3dc065cf1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 13:58:30 -0400 Subject: NFSv4: Fix underestimate of NFSv4 lookup request size Also fix up the underestimate of fs_locations Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f6068bf3823..4c8f67d4752 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -119,8 +119,8 @@ static int nfs4_stat_to_errno(int); 3 + (NFS4_VERIFIER_SIZE >> 2)) #define decode_setclientid_confirm_maxsz \ (op_decode_hdr_maxsz) -#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ - 1 + ((3 + NFS4_FHSIZE) >> 2)) +#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) +#define decode_lookup_maxsz (op_decode_hdr_maxsz) #define encode_share_access_maxsz \ (2) #define encode_createmode_maxsz (1 + nfs4_fattr_maxsz) @@ -161,6 +161,10 @@ static int nfs4_stat_to_errno(int); nfs4_fattr_bitmap_maxsz) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) +#define encode_fs_locations_maxsz \ + (encode_getattr_maxsz) +#define decode_fs_locations_maxsz \ + (0) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ @@ -327,7 +331,7 @@ static int nfs4_stat_to_errno(int); encode_getfh_maxsz) #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ + decode_lookup_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ @@ -446,12 +450,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_fs_locations_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz) + encode_lookup_maxsz + \ + encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz) + decode_lookup_maxsz + \ + decode_fs_locations_maxsz) static struct { unsigned int mode; -- cgit v1.2.3-70-g09d2 From 88d9093997e1c73ca98db41b5605dbde7783845f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 14:03:03 -0400 Subject: NFSv4: nfs_increment_open_seqid should not return a value It is a void function... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2b00c45aebe..0f79d56e97f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -528,7 +528,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() */ -static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { switch (status) { case 0: @@ -557,7 +557,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); } - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } /* @@ -567,7 +567,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) */ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) { - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) -- cgit v1.2.3-70-g09d2 From 9f958ab8858c75df800e0121b1920182820cbc39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 13:58:33 -0400 Subject: NFSv4: Reduce the chances of an open_owner identifier collision Currently we just use a 32-bit counter. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +- fs/nfs/nfs4_fs.h | 17 ++-- fs/nfs/nfs4proc.c | 8 +- fs/nfs/nfs4state.c | 193 ++++++++++++++++++++++++++++++++++++---------- fs/nfs/nfs4xdr.c | 27 ++++--- include/linux/nfs_fs_sb.h | 5 +- include/linux/nfs_xdr.h | 4 +- 7 files changed, 190 insertions(+), 67 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6b424407d63..ccb455053ee 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -130,7 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, #ifdef CONFIG_NFS_V4 init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); @@ -154,7 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) #ifdef CONFIG_NFS_V4 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - BUG_ON(!list_empty(&clp->cl_state_owners)); + BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); #endif diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c97a0ad8430..44b56c915f7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -70,19 +70,25 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { - spinlock_t so_lock; - struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs_unique_id so_owner_id; struct nfs_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - atomic_t so_count; + struct rb_node so_client_node; struct rpc_cred *so_cred; /* Associated cred */ + + spinlock_t so_lock; + atomic_t so_count; struct list_head so_states; struct list_head so_delegations; struct nfs_seqid_counter so_seqid; @@ -108,7 +114,7 @@ struct nfs4_lock_state { #define NFS_LOCK_INITIALIZED 1 int ls_flags; struct nfs_seqid_counter ls_seqid; - u32 ls_id; + struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; }; @@ -189,7 +195,6 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 84d0b7e0dd6..1840ebc78fd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -253,7 +253,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_id; + p->o_arg.id = sp->so_owner_id.id; p->o_arg.name = &p->path.dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; @@ -651,7 +651,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) return; /* Update sequence id. */ - data->o_arg.id = sp->so_id; + data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; @@ -3029,7 +3029,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id; + arg.lock_owner.id = lsp->ls_id.id; status = rpc_call_sync(server->client, &msg, 0); switch (status) { case 0: @@ -3243,7 +3243,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id; + p->arg.lock_owner.id = lsp->ls_id.id; p->lsp = lsp; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0f79d56e97f..ab0b5ab60e6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -69,18 +70,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -u32 -nfs4_alloc_lockowner_id(struct nfs_client *clp) -{ - return clp->cl_lockowner_id ++; -} - struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct rpc_cred *cred = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); if (list_empty(&sp->so_states)) continue; cred = get_rpccred(sp->so_cred); @@ -92,32 +89,129 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; - if (!list_empty(&clp->cl_state_owners)) { - sp = list_entry(clp->cl_state_owners.next, - struct nfs4_state_owner, so_list); + pos = rb_first(&clp->cl_state_owners); + if (pos != NULL) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); return get_rpccred(sp->so_cred); } return NULL; } +static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, + __u64 minval, int maxbits) +{ + struct rb_node **p, *parent; + struct nfs_unique_id *pos; + __u64 mask = ~0ULL; + + if (maxbits < 64) + mask = (1ULL << maxbits) - 1ULL; + + /* Ensure distribution is more or less flat */ + get_random_bytes(&new->id, sizeof(new->id)); + new->id &= mask; + if (new->id < minval) + new->id += minval; +retry: + p = &root->rb_node; + parent = NULL; + + while (*p != NULL) { + parent = *p; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + + if (new->id < pos->id) + p = &(*p)->rb_left; + else if (new->id > pos->id) + p = &(*p)->rb_right; + else + goto id_exists; + } + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + return; +id_exists: + for (;;) { + new->id++; + if (new->id < minval || (new->id & mask) != new->id) { + new->id = minval; + break; + } + parent = rb_next(parent); + if (parent == NULL) + break; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + if (new->id < pos->id) + break; + } + goto retry; +} + +static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) +{ + rb_erase(&id->rb_node, root); +} + static struct nfs4_state_owner * nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) { + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; struct nfs4_state_owner *sp, *res = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_cred != cred) - continue; - atomic_inc(&sp->so_count); - /* Move to the head of the list */ - list_move(&sp->so_list, &clp->cl_state_owners); - res = sp; - break; + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (cred < sp->so_cred) + p = &parent->rb_left; + else if (cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + res = sp; + break; + } } return res; } +static struct nfs4_state_owner * +nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) +{ + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; + struct nfs4_state_owner *sp; + + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (new->so_cred < sp->so_cred) + p = &parent->rb_left; + else if (new->so_cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + return sp; + } + } + nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); + rb_link_node(&new->so_client_node, parent, p); + rb_insert_color(&new->so_client_node, &clp->cl_state_owners); + return new; +} + +static void +nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) +{ + if (!RB_EMPTY_NODE(&sp->so_client_node)) + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -145,10 +239,14 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs_client *clp = sp->so_client; - spin_lock(&clp->cl_lock); - list_del_init(&sp->so_list); - spin_unlock(&clp->cl_lock); + if (!RB_EMPTY_NODE(&sp->so_client_node)) { + struct nfs_client *clp = sp->so_client; + + spin_lock(&clp->cl_lock); + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + RB_CLEAR_NODE(&sp->so_client_node); + spin_unlock(&clp->cl_lock); + } } /* @@ -160,22 +258,24 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; - new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); sp = nfs4_find_state_owner(clp, cred); - if (sp == NULL && new != NULL) { - list_add(&new->so_list, &clp->cl_state_owners); - new->so_client = clp; - new->so_id = nfs4_alloc_lockowner_id(clp); - new->so_cred = get_rpccred(cred); - sp = new; - new = NULL; - } spin_unlock(&clp->cl_lock); - kfree(new); if (sp != NULL) return sp; - return NULL; + new = nfs4_alloc_state_owner(); + if (new == NULL) + return NULL; + new->so_client = clp; + new->so_cred = cred; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner(clp, new); + spin_unlock(&clp->cl_lock); + if (sp == new) + get_rpccred(cred); + else + kfree(new); + return sp; } /* @@ -189,7 +289,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - list_del(&sp->so_list); + nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); put_rpccred(cred); kfree(sp); @@ -386,12 +486,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); - lsp->ls_id = nfs4_alloc_lockowner_id(clp); + nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); spin_unlock(&clp->cl_lock); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } +static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +{ + struct nfs_client *clp = lsp->ls_state->owner->so_client; + + spin_lock(&clp->cl_lock); + nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); + spin_unlock(&clp->cl_lock); + kfree(lsp); +} + /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -421,7 +531,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ return NULL; } spin_unlock(&state->state_lock); - kfree(new); + if (new != NULL) + nfs4_free_lock_state(new); return lsp; } @@ -442,7 +553,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - kfree(lsp); + nfs4_free_lock_state(lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -719,11 +830,13 @@ out_err: static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state *state; struct nfs4_lock_state *lock; /* Reset all sequence ids to zero */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); sp->so_seqid.counter = 0; sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); @@ -742,6 +855,7 @@ static int reclaimer(void *ptr) { struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; int status = 0; @@ -787,7 +901,8 @@ restart_loop: /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { if (status == -NFS4ERR_NO_GRACE) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c8f67d4752..c08738441f7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -68,10 +68,10 @@ static int nfs4_stat_to_errno(int); #endif /* lock,open owner id: - * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define open_owner_id_maxsz (1 + 1) -#define lock_owner_id_maxsz (1 + 1) +#define open_owner_id_maxsz (1 + 4) +#define lock_owner_id_maxsz (1 + 4) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -827,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITE64(nfs4_lock_length(args->fl)); WRITE32(args->new_lock_owner); if (args->new_lock_owner){ - RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); WRITE32(args->open_seqid->sequence->counter); WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); } else { RESERVE_SPACE(NFS4_STATEID_SIZE+4); @@ -848,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg { __be32 *p; - RESERVE_SPACE(40); + RESERVE_SPACE(52); WRITE32(OP_LOCKT); WRITE32(nfs4_lock_type(args->fl, 0)); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); return 0; } @@ -920,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena WRITE32(OP_OPEN); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); - RESERVE_SPACE(16); + RESERVE_SPACE(28); WRITE64(arg->clientid); - WRITE32(4); - WRITE32(arg->id); + WRITE32(16); + WRITEMEM("open id:", 8); + WRITE64(arg->id); } static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2cef0a68aa7..0cac49bc095 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -34,7 +34,8 @@ struct nfs_client { nfs4_verifier cl_confirm; unsigned long cl_state; - u32 cl_lockowner_id; + struct rb_root cl_openowner_id; + struct rb_root cl_lockowner_id; /* * The following rwsem ensures exclusive access to the server @@ -43,7 +44,7 @@ struct nfs_client { struct rw_semaphore cl_sem; struct list_head cl_delegations; - struct list_head cl_state_owners; + struct rb_root cl_state_owners; spinlock_t cl_lock; unsigned long cl_lease_time; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f7100df3a69..38d77681cf2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -119,7 +119,7 @@ struct nfs_openargs { struct nfs_seqid * seqid; int open_flags; __u64 clientid; - __u32 id; + __u64 id; union { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ @@ -181,7 +181,7 @@ struct nfs_closeres { * */ struct nfs_lowner { __u64 clientid; - u32 id; + __u64 id; }; struct nfs_lock_args { -- cgit v1.2.3-70-g09d2 From 1b45c46cf75d9c48eb611d5cc41607ac1f046606 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 13:04:56 -0400 Subject: NFSv4: Fix atomic open for execute... Currently we do not check for the FMODE_EXEC flag as we should. For that particular case, we need to perform an ACCESS call to the server in order to check that the file is executable. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1840ebc78fd..69aab8db494 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -769,6 +769,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf mask |= MAY_READ; if (openflags & FMODE_WRITE) mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out; @@ -1269,7 +1271,16 @@ out: static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) { struct file *filp; + int ret; + /* If the open_intent is for execute, we have an extra check to make */ + if (nd->intent.open.flags & FMODE_EXEC) { + ret = _nfs4_do_access(state->inode, + state->owner->so_cred, + nd->intent.open.flags); + if (ret < 0) + goto out_close; + } filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; @@ -1277,8 +1288,10 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct ctx->state = state; return 0; } + ret = PTR_ERR(filp); +out_close: nfs4_close_state(path, state, nd->intent.open.flags); - return PTR_ERR(filp); + return ret; } struct dentry * -- cgit v1.2.3-70-g09d2 From 1c816efa245111c52858fbe55d99474f3c149dd3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 14:41:19 -0400 Subject: NFSv4: Fix a bug in __nfs4_find_state_byowner The test for state->state == 0 does not tell you that the stateid is in the process of being freed. It really tells you that the stateid is not yet initialised... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ab0b5ab60e6..ac816b303f3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -333,13 +333,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { - /* Is this in the process of being freed? */ - if (state->state == 0) + if (state->owner != owner) continue; - if (state->owner == owner) { - atomic_inc(&state->count); + if (atomic_inc_not_zero(&state->count)) return state; - } } return NULL; } -- cgit v1.2.3-70-g09d2 From 549d6ed5e85003370fe858e70864a71882491d28 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 16:42:45 -0400 Subject: NFSv4: set the delegation in nfs4_opendata_to_nfs4_state This ensures that nfs4_open_release() and nfs4_open_confirm_release() can now handle an eventual delegation that was returned with out open. As such, it fixes a delegation "leak" when the user breaks out of an open call. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69aab8db494..4f0b06d549f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -356,6 +356,21 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (state == NULL) goto put_inode; update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); + if (data->o_res.delegation_type != 0) { + struct nfs_inode *nfsi = NFS_I(inode); + int delegation_flags = 0; + + if (nfsi->delegation) + delegation_flags = nfsi->delegation->flags; + if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + } put_inode: iput(inode); out: @@ -433,23 +448,8 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * opendata->o_res.delegation_type = delegation; opendata->o_arg.open_flags |= mode; newstate = nfs4_opendata_to_nfs4_state(opendata); - if (newstate != NULL) { - if (opendata->o_res.delegation_type != 0) { - struct nfs_inode *nfsi = NFS_I(newstate->inode); - int delegation_flags = 0; - if (nfsi->delegation) - delegation_flags = nfsi->delegation->flags; - if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) - nfs_inode_set_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - else - nfs_inode_reclaim_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - } + if (newstate != NULL) nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags); - } if (newstate != state) return -ESTALE; return 0; @@ -1005,8 +1005,6 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct state = nfs4_opendata_to_nfs4_state(opendata); if (state == NULL) goto err_opendata_put; - if (opendata->o_res.delegation_type != 0) - nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); -- cgit v1.2.3-70-g09d2 From 2ced46c27058710a6d731d6eca77f1dd14ccde75 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 23:48:13 -0400 Subject: NFSv4: Fix up a bug in nfs4_open_recover() Don't clobber the delegation info... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 65 ++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4f0b06d549f..03b60c67ca7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -230,6 +230,16 @@ struct nfs4_opendata { int cancelled; }; + +static void nfs4_init_opendata_res(struct nfs4_opendata *p) +{ + p->o_res.f_attr = &p->f_attr; + p->o_res.dir_attr = &p->dir_attr; + p->o_res.server = p->o_arg.server; + nfs_fattr_init(&p->f_attr); + nfs_fattr_init(&p->dir_attr); +} + static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, int flags, const struct iattr *attrs) @@ -258,11 +268,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - p->o_res.f_attr = &p->f_attr; - p->o_res.dir_attr = &p->dir_attr; - p->o_res.server = server; - nfs_fattr_init(&p->f_attr); - nfs_fattr_init(&p->dir_attr); if (flags & O_EXCL) { u32 *s = (u32 *) p->o_arg.u.verifier.data; s[0] = jiffies; @@ -274,6 +279,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; + nfs4_init_opendata_res(p); kref_init(&p->kref); return p; err_free: @@ -394,64 +400,54 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) { + struct nfs4_state *newstate; int ret; opendata->o_arg.open_flags = openflags; + memset(&opendata->o_res, 0, sizeof(opendata->o_res)); + memset(&opendata->c_res, 0, sizeof(opendata->c_res)); + nfs4_init_opendata_res(opendata); ret = _nfs4_proc_open(opendata); if (ret != 0) return ret; - memcpy(stateid->data, opendata->o_res.stateid.data, - sizeof(stateid->data)); + newstate = nfs4_opendata_to_nfs4_state(opendata); + if (newstate != NULL) + nfs4_close_state(&opendata->path, newstate, openflags); + *res = newstate; return 0; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - nfs4_stateid stateid; struct nfs4_state *newstate; - int mode = 0; - int delegation = 0; int ret; /* memory barrier prior to reading state->n_* */ + clear_bit(NFS_DELEGATED_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ|FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_wronly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_rdonly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ; + if (newstate != state) + return -ESTALE; } - clear_bit(NFS_DELEGATED_STATE, &state->flags); - if (mode == 0) - return 0; - if (opendata->o_res.delegation_type == 0) - opendata->o_res.delegation_type = delegation; - opendata->o_arg.open_flags |= mode; - newstate = nfs4_opendata_to_nfs4_state(opendata); - if (newstate != NULL) - nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags); - if (newstate != state) - return -ESTALE; return 0; } @@ -730,6 +726,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) * want to ensure that it takes the 'error' code path. */ data->rpc_status = -ENOMEM; + data->cancelled = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); if (IS_ERR(task)) return PTR_ERR(task); -- cgit v1.2.3-70-g09d2 From 901630278469c0d7610554227f39ed2d02d0d270 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jul 2007 14:55:18 -0400 Subject: NFSv4: Support recalling delegations by stateid There appear to be some rogue servers out there that issue multiple delegations with different stateids for the same file. Ensure that when we return delegations, we do so on a per-stateid basis rather than a per-file basis. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 80 +++++++++++++++++++++++++++++++++-------------------- fs/nfs/delegation.h | 10 +------ 2 files changed, 51 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9f17b91205c..cee2ba42b68 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -57,7 +57,7 @@ out_err: return status; } -static void nfs_delegation_claim_opens(struct inode *inode) +static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -72,6 +72,8 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); err = nfs4_open_delegation_recall(ctx, state); @@ -170,33 +172,55 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -int __nfs_inode_return_delegation(struct inode *inode) +static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; - int res = 0; nfs_msync_inode(inode); down_read(&clp->cl_sem); /* Guard against new delegated open calls */ down_write(&nfsi->rwsem); - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); - if (delegation != NULL) - res = nfs_do_return_delegation(inode, delegation); - return res; + return nfs_do_return_delegation(inode, delegation); +} + +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +{ + struct nfs_delegation *delegation = nfsi->delegation; + + if (delegation == NULL) + goto nomatch; + if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + goto nomatch; + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + return delegation; +nomatch: + return NULL; +} + +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int err = 0; + + if (nfsi->delegation_state != 0) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + err = __nfs_inode_return_delegation(inode, delegation); + } + return err; } /* @@ -218,8 +242,9 @@ restart: inode = igrab(delegation->inode); if (inode == NULL) continue; + nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } @@ -243,8 +268,9 @@ restart: inode = igrab(delegation->inode); if (inode == NULL) continue; + nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } @@ -285,8 +311,9 @@ restart: inode = igrab(delegation->inode); if (inode == NULL) continue; + nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } @@ -316,21 +343,14 @@ static int recall_thread(void *data) down_read(&clp->cl_sem); down_write(&nfsi->rwsem); spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL && memcmp(delegation->stateid.data, - args->stateid->data, - sizeof(delegation->stateid.data)) == 0) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; + delegation = nfs_detach_delegation_locked(nfsi, args->stateid); + if (delegation != NULL) args->result = 0; - } else { - delegation = NULL; + else args->result = -ENOENT; - } spin_unlock(&clp->cl_lock); complete(&args->started); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, args->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index f6e42fb21af..7b22f174244 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -26,7 +26,7 @@ struct nfs_delegation { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int __nfs_inode_return_delegation(struct inode *inode); +int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); @@ -52,14 +52,6 @@ static inline int nfs_have_delegation(struct inode *inode, int flags) return 0; } -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - int err = 0; - - if (NFS_I(inode)->delegation != NULL) - err = __nfs_inode_return_delegation(inode); - return err; -} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { -- cgit v1.2.3-70-g09d2 From 13437e12fb43cb7e285ff59248f781c91578eafe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Jul 2007 15:10:43 -0400 Subject: NFSv4: Support recalling delegations by stateid part 2 Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 2 +- fs/nfs/nfs4proc.c | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index cee2ba42b68..93a9f4bd9bd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -76,7 +76,7 @@ again: continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx, state); + err = nfs4_open_delegation_recall(ctx, state, stateid); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 7b22f174244..8f79a313516 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -39,7 +39,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 03b60c67ca7..10946415de7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -510,32 +510,30 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_state_owner *sp = state->owner; struct nfs4_opendata *opendata; int ret; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, + memcpy(opendata->o_arg.u.delegation.data, stateid->data, sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); return ret; } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_exception exception = { }; struct nfs_server *server = NFS_SERVER(state->inode); int err; do { - err = _nfs4_open_delegation_recall(ctx, state); + err = _nfs4_open_delegation_recall(ctx, state, stateid); switch (err) { case 0: return err; -- cgit v1.2.3-70-g09d2 From 8383e4602c89857ef926f29ca61ac0a83a614443 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Jul 2007 15:12:04 -0400 Subject: NFSv4: Use RCU to protect delegations Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 114 +++++++++++++++++++++++++++++----------------------- fs/nfs/delegation.h | 14 +++++-- 2 files changed, 73 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 93a9f4bd9bd..56f4f6a99d4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation) kfree(delegation); } +static void nfs_free_delegation_callback(struct rcu_head *head) +{ + struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); + + nfs_free_delegation(delegation); +} + static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; @@ -133,10 +140,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->inode = inode; spin_lock(&clp->cl_lock); - if (nfsi->delegation == NULL) { - list_add(&delegation->super_list, &clp->cl_delegations); - nfsi->delegation = delegation; + if (rcu_dereference(nfsi->delegation) == NULL) { + list_add_rcu(&delegation->super_list, &clp->cl_delegations); nfsi->delegation_state = delegation->type; + rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; } else { if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, @@ -157,7 +164,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * int res = 0; res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); - nfs_free_delegation(delegation); + call_rcu(&delegation->rcu, nfs_free_delegation_callback); return res; } @@ -191,16 +198,16 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) { - struct nfs_delegation *delegation = nfsi->delegation; + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); if (delegation == NULL) goto nomatch; if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, sizeof(delegation->stateid.data)) != 0) goto nomatch; - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; + list_del_rcu(&delegation->super_list); nfsi->delegation_state = 0; + rcu_assign_pointer(nfsi->delegation, NULL); return delegation; nomatch: return NULL; @@ -213,7 +220,7 @@ int nfs_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; int err = 0; - if (nfsi->delegation_state != 0) { + if (rcu_dereference(nfsi->delegation) != NULL) { spin_lock(&clp->cl_lock); delegation = nfs_detach_delegation_locked(nfsi, NULL); spin_unlock(&clp->cl_lock); @@ -235,20 +242,23 @@ void nfs_return_all_delegations(struct super_block *sb) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (delegation->inode->i_sb != sb) continue; inode = igrab(delegation->inode); if (inode == NULL) continue; - nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - __nfs_inode_return_delegation(inode, delegation); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } static int nfs_do_expire_all_delegations(void *ptr) @@ -259,23 +269,26 @@ static int nfs_do_expire_all_delegations(void *ptr) allow_signal(SIGKILL); restart: - spin_lock(&clp->cl_lock); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; - nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - __nfs_inode_return_delegation(inode, delegation); + rcu_read_unlock(); + if (delegation) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } + rcu_read_unlock(); out: - spin_unlock(&clp->cl_lock); nfs_put_client(clp); module_put_and_exit(0); } @@ -306,18 +319,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; - nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - __nfs_inode_return_delegation(inode, delegation); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } struct recall_threadargs { @@ -391,14 +407,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs { struct nfs_delegation *delegation; struct inode *res = NULL; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { res = igrab(delegation->inode); break; } } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); return res; } @@ -408,10 +424,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } /* @@ -419,39 +435,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp) */ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { - struct nfs_delegation *delegation, *n; - LIST_HEAD(head); - spin_lock(&clp->cl_lock); - list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + struct nfs_delegation *delegation; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) continue; - list_move(&delegation->super_list, &head); - NFS_I(delegation->inode)->delegation = NULL; - NFS_I(delegation->inode)->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - while(!list_empty(&head)) { - delegation = list_entry(head.next, struct nfs_delegation, super_list); - list_del(&delegation->super_list); - nfs_free_delegation(delegation); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) + call_rcu(&delegation->rcu, nfs_free_delegation_callback); + goto restart; } + rcu_read_unlock(); } int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int res = 0; + int ret = 0; - if (nfsi->delegation_state == 0) - return 0; - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); if (delegation != NULL) { memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); - res = 1; + ret = 1; } - spin_unlock(&clp->cl_lock); - return res; + rcu_read_unlock(); + return ret; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 8f79a313516..5874ce7fdba 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -22,6 +22,7 @@ struct nfs_delegation { long flags; loff_t maxsize; __u64 change_attr; + struct rcu_head rcu; }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -45,11 +46,16 @@ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); static inline int nfs_have_delegation(struct inode *inode, int flags) { + struct nfs_delegation *delegation; + int ret = 0; + flags &= FMODE_READ|FMODE_WRITE; - smp_rmb(); - if ((NFS_I(inode)->delegation_state & flags) == flags) - return 1; - return 0; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && (delegation->type & flags) == flags) + ret = 1; + rcu_read_unlock(); + return ret; } #else -- cgit v1.2.3-70-g09d2 From 412c77cee6d6e73fbe1dc3d67f52163efed33fc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 16:10:55 -0400 Subject: NFSv4: Defer inode revalidation when setting up a delegation Currently we force a synchronous call to __nfs_revalidate_inode() in nfs_inode_set_delegation(). This not only ensures that we cannot call nfs_inode_set_delegation from an asynchronous context, but it also slows down any call to open(). Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 10 ++++++---- fs/nfs/inode.c | 4 +++- include/linux/nfs_fs.h | 1 + 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 56f4f6a99d4..20ac403469a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -124,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; - /* Ensure we first revalidate the attributes and page cache! */ - if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); if (delegation == NULL) return -ENOMEM; @@ -154,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct status = -EIO; } } + + /* Ensure we revalidate the attributes and page cache! */ + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); + spin_unlock(&clp->cl_lock); kfree(delegation); return status; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9d5124166d2..3d9fccf4ef9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1072,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid &= ~NFS_INO_INVALID_DATA; if (data_stable) invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); - if (!nfs_have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegation(inode, FMODE_READ) || + (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cf395351cdd..e94971040de 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -184,6 +184,7 @@ struct nfs_inode { #define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ #define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ +#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ /* * Bit offsets in flags field -- cgit v1.2.3-70-g09d2 From 0f9f95e0ad1f9d07d77832c5b60f7d30440602ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2007 16:19:56 -0400 Subject: NFSv4: Clean up confirmation of sequence ids... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10946415de7..61ba32af4d2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -574,8 +574,8 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) sizeof(data->o_res.stateid.data)); renew_lease(data->o_res.server, data->timestamp); } - nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); + nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); } static void nfs4_open_confirm_release(void *calldata) @@ -674,6 +674,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = -ENOTDIR; } renew_lease(data->o_res.server, data->timestamp); + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) + nfs_confirm_seqid(&data->owner->so_seqid, 0); } nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); } @@ -748,7 +750,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; -- cgit v1.2.3-70-g09d2 From 003707c7225dbd4bf879b6c204743554de0a08d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jul 2007 18:07:55 -0400 Subject: NFSv4: Always use the delegation if we have one Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 14 +++++---- fs/nfs/nfs4proc.c | 88 ++++++++++++++++++++++++++++++++++++++++-------------- fs/nfs/nfs4state.c | 28 +++++++++++------ 3 files changed, 94 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 44b56c915f7..4a1c4d80a57 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -122,7 +122,10 @@ struct nfs4_lock_state { /* bits for nfs4_state->flags */ enum { LK_STATE_IN_USE, - NFS_DELEGATED_STATE, + NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ + NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ + NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ }; struct nfs4_state { @@ -136,11 +139,12 @@ struct nfs4_state { unsigned long flags; /* Do we hold any locks? */ spinlock_t state_lock; /* Protects the lock_states list */ - nfs4_stateid stateid; + nfs4_stateid stateid; /* Current stateid: may be delegation */ + nfs4_stateid open_stateid; /* OPEN stateid */ - unsigned int n_rdonly; - unsigned int n_wronly; - unsigned int n_rdwr; + unsigned int n_rdonly; /* Number of read-only references */ + unsigned int n_wronly; /* Number of write-only references */ + unsigned int n_rdwr; /* Number of read/write references */ int state; /* State on the server (R,W, or RW) */ atomic_t count; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 61ba32af4d2..128fe23d3f1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -319,7 +319,7 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } -static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) +static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) { switch (open_flags) { case FMODE_WRITE: @@ -331,9 +331,36 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_ case FMODE_READ|FMODE_WRITE: state->n_rdwr++; } + nfs4_state_set_mode_locked(state, state->state | open_flags); +} + +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); + memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + switch (open_flags) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); + } } -static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + spin_lock(&state->owner->so_lock); + spin_lock(&state->inode->i_lock); + nfs_set_open_stateid_locked(state, stateid, open_flags); + spin_unlock(&state->inode->i_lock); + spin_unlock(&state->owner->so_lock); +} + +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) { struct inode *inode = state->inode; @@ -341,9 +368,13 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, /* Protect against nfs4_find_state_byowner() */ spin_lock(&state->owner->so_lock); spin_lock(&inode->i_lock); - memcpy(&state->stateid, stateid, sizeof(state->stateid)); + if (deleg_stateid != NULL) { + memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + } + if (open_stateid != NULL) + nfs_set_open_stateid_locked(state, open_stateid, open_flags); update_open_stateflags(state, open_flags); - nfs4_state_set_mode_locked(state, state->state | open_flags); spin_unlock(&inode->i_lock); spin_unlock(&state->owner->so_lock); } @@ -352,6 +383,8 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data { struct inode *inode; struct nfs4_state *state = NULL; + struct nfs_delegation *delegation; + nfs4_stateid *deleg_stateid = NULL; if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto out; @@ -361,13 +394,14 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data state = nfs4_get_open_state(inode, data->owner); if (state == NULL) goto put_inode; - update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); if (data->o_res.delegation_type != 0) { - struct nfs_inode *nfsi = NFS_I(inode); int delegation_flags = 0; - if (nfsi->delegation) - delegation_flags = nfsi->delegation->flags; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) nfs_inode_set_delegation(state->inode, data->owner->so_cred, @@ -377,6 +411,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data data->owner->so_cred, &data->o_res); } + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) + deleg_stateid = &delegation->stateid; + update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); + rcu_read_unlock(); put_inode: iput(inode); out: @@ -911,8 +951,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred unlock_kernel(); if (err != 0) goto out_put_open_state; - set_bit(NFS_DELEGATED_STATE, &state->flags); - update_open_stateid(state, &delegation->stateid, open_flags); + update_open_stateid(state, NULL, &delegation->stateid, open_flags); out_ok: nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); @@ -1149,8 +1188,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: - memcpy(&state->stateid, &calldata->res.stateid, - sizeof(state->stateid)); + nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: @@ -1175,26 +1213,32 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) .rpc_resp = &calldata->res, .rpc_cred = state->owner->so_cred, }; - int mode = 0, old_mode; + int clear_rd, clear_wr, clear_rdwr; + int mode; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - /* Recalculate the new open mode in case someone reopened the file - * while we were waiting in line to be scheduled. - */ + + mode = FMODE_READ|FMODE_WRITE; + clear_rd = clear_wr = clear_rdwr = 0; spin_lock(&state->owner->so_lock); spin_lock(&calldata->inode->i_lock); - mode = old_mode = state->state; + /* Calculate the change in open mode */ if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { mode &= ~FMODE_READ; - if (state->n_wronly == 0) + clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { mode &= ~FMODE_WRITE; + clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } } - nfs4_state_set_mode_locked(state, mode); spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); - if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { + if (!clear_rd && !clear_wr && !clear_rdwr) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; return; @@ -1238,7 +1282,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); - calldata->arg.stateid = &state->stateid; + calldata->arg.stateid = &state->open_stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ac816b303f3..4f78c0d1eab 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -412,7 +412,8 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - int oldstate, newstate = 0; + int call_close = 0; + int newstate; atomic_inc(&owner->so_count); /* Protect against nfs4_find_state() */ @@ -428,21 +429,26 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) case FMODE_READ|FMODE_WRITE: state->n_rdwr--; } - oldstate = newstate = state->state; + newstate = FMODE_READ|FMODE_WRITE; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { newstate &= ~FMODE_READ; - if (state->n_wronly == 0) + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { newstate &= ~FMODE_WRITE; + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (newstate == 0) + clear_bit(NFS_DELEGATED_STATE, &state->flags); } - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - nfs4_state_set_mode_locked(state, newstate); - oldstate = newstate; - } + nfs4_state_set_mode_locked(state, newstate); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); - if (oldstate == newstate) { + if (!call_close) { nfs4_put_open_state(state); nfs4_put_state_owner(owner); } else @@ -838,6 +844,10 @@ static void nfs4_state_mark_reclaim(struct nfs_client *clp) sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { + clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); list_for_each_entry(lock, &state->lock_states, ls_locks) { lock->ls_seqid.counter = 0; lock->ls_seqid.flags = 0; -- cgit v1.2.3-70-g09d2 From 6f43ddccb31b5bd2297878f6f3735d45fd4dfce3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2007 16:49:11 -0400 Subject: NFSv4: Improve the debugging of bad sequence id errors... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/nfs4state.c | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 128fe23d3f1..3b59c5ded3f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1079,7 +1079,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + printk(KERN_WARNING "NFS: v4 server %s " + " returned a bad sequence-id error!\n", + NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; continue; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4f78c0d1eab..4fa4054cdf3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -38,6 +38,7 @@ * subsequent patch. */ +#include #include #include #include @@ -648,6 +649,12 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case 0: break; case -NFS4ERR_BAD_SEQID: + if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) + return; + printk(KERN_WARNING "NFS: v4 server returned a bad" + "sequence-id error on an" + "unconfirmed sequence %p!\n", + seqid->sequence); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: -- cgit v1.2.3-70-g09d2 From 1b370bc28f90955bccda8be5e7d7047ad1381da7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Jul 2007 08:04:47 -0400 Subject: NFSv4: Allow nfs4_opendata_to_nfs4_state to return errors. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3b59c5ded3f..52ba7630794 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -385,15 +385,19 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data struct nfs4_state *state = NULL; struct nfs_delegation *delegation; nfs4_stateid *deleg_stateid = NULL; + int ret; + ret = -EAGAIN; if (!(data->f_attr.valid & NFS_ATTR_FATTR)) - goto out; + goto err; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + ret = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + goto err; + ret = -ENOMEM; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) - goto put_inode; + goto err_put_inode; if (data->o_res.delegation_type != 0) { int delegation_flags = 0; @@ -417,10 +421,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data deleg_stateid = &delegation->stateid; update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); rcu_read_unlock(); -put_inode: iput(inode); -out: return state; +err_put_inode: + iput(inode); +err: + return ERR_PTR(ret); } static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) @@ -453,8 +459,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf if (ret != 0) return ret; newstate = nfs4_opendata_to_nfs4_state(opendata); - if (newstate != NULL) - nfs4_close_state(&opendata->path, newstate, openflags); + if (IS_ERR(newstate)) + return PTR_ERR(newstate); + nfs4_close_state(&opendata->path, newstate, openflags); *res = newstate; return 0; } @@ -631,7 +638,7 @@ static void nfs4_open_confirm_release(void *calldata) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) + if (!IS_ERR(state)) nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: nfs4_opendata_put(data); @@ -736,7 +743,7 @@ static void nfs4_open_release(void *calldata) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) + if (!IS_ERR(state)) nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: nfs4_opendata_put(data); @@ -1036,9 +1043,9 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct if (opendata->o_arg.open_flags & O_EXCL) nfs4_exclusive_attrset(opendata, sattr); - status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); - if (state == NULL) + status = PTR_ERR(state); + if (IS_ERR(state)) goto err_opendata_put; nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); -- cgit v1.2.3-70-g09d2 From 3e309914a15333a5493058e4927e979c7434ae44 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Jul 2007 13:19:59 -0400 Subject: NFSv4: Clean up _nfs4_proc_open() Use a flag instead of the 'data->rpc_status = -ENOMEM hack. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 52ba7630794..05afb7ba3bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -226,6 +226,7 @@ struct nfs4_opendata { struct nfs4_state_owner *owner; struct iattr attrs; unsigned long timestamp; + unsigned int rpc_done : 1; int rpc_status; int cancelled; }; @@ -620,6 +621,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) memcpy(data->o_res.stateid.data, data->c_res.stateid.data, sizeof(data->o_res.stateid.data)); renew_lease(data->o_res.server, data->timestamp); + data->rpc_done = 1; } nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); @@ -634,7 +636,7 @@ static void nfs4_open_confirm_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (!data->rpc_done) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); @@ -660,11 +662,8 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) int status; kref_get(&data->kref); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + data->rpc_done = 0; + data->rpc_status = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); if (IS_ERR(task)) return PTR_ERR(task); @@ -725,6 +724,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) nfs_confirm_seqid(&data->owner->so_seqid, 0); } nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); + data->rpc_done = 1; } static void nfs4_open_release(void *calldata) @@ -736,7 +736,7 @@ static void nfs4_open_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (data->rpc_status != 0 || !data->rpc_done) goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) @@ -768,11 +768,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) int status; kref_get(&data->kref); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + data->rpc_done = 0; + data->rpc_status = 0; data->cancelled = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); if (IS_ERR(task)) @@ -784,7 +781,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } else status = data->rpc_status; rpc_put_task(task); - if (status != 0) + if (status != 0 || !data->rpc_done) return status; if (o_arg->open_flags & O_CREAT) { -- cgit v1.2.3-70-g09d2 From aac00a8d0a53097063da532cbdf0b8775a4dcd53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jul 2007 19:02:21 -0400 Subject: NFSv4: Check for the existence of a delegation in nfs4_open_prepare() We should not be calling open() on an inode that has a delegation unless we're doing a reclaim. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 195 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 107 insertions(+), 88 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05afb7ba3bc..ea332e831d7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -224,6 +225,7 @@ struct nfs4_opendata { struct path path; struct dentry *dir; struct nfs4_state_owner *owner; + struct nfs4_state *state; struct iattr attrs; unsigned long timestamp; unsigned int rpc_done : 1; @@ -296,6 +298,8 @@ static void nfs4_opendata_free(struct kref *kref) struct nfs4_opendata, kref); nfs_free_seqid(p->o_arg.seqid); + if (p->state != NULL) + nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); dput(p->dir); dput(p->path.dentry); @@ -320,6 +324,15 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } +static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) +{ + if ((delegation->type & open_flags) != open_flags) + return 0; + if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) + return 0; + return 1; +} + static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) { switch (open_flags) { @@ -380,6 +393,65 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_sta spin_unlock(&state->owner->so_lock); } +static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL || (delegation->type & open_flags) == open_flags) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + nfs_inode_return_delegation(inode); +} + +static struct nfs4_state *nfs4_try_open_delegated(struct nfs4_opendata *opendata) +{ + struct nfs4_state *state = opendata->state; + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *delegation; + int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); + nfs4_stateid stateid; + int ret = -EAGAIN; + + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation == NULL) + goto out_unlock; + for (;;) { + if (!can_open_delegated(delegation, open_mode)) + break; + /* Save the delegation */ + memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + rcu_read_unlock(); + lock_kernel(); + ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + unlock_kernel(); + if (ret != 0) + goto out; + ret = -EAGAIN; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation == NULL) + break; + /* Is the delegation still valid? */ + if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0) + continue; + rcu_read_unlock(); + update_open_stateid(state, NULL, &stateid, open_mode); + goto out_return_state; + } +out_unlock: + rcu_read_unlock(); +out: + return ERR_PTR(ret); +out_return_state: + atomic_inc(&state->count); + return state; +} + static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { struct inode *inode; @@ -388,6 +460,11 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data nfs4_stateid *deleg_stateid = NULL; int ret; + if (!data->rpc_done) { + state = nfs4_try_open_delegated(data); + goto out; + } + ret = -EAGAIN; if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto err; @@ -423,6 +500,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); rcu_read_unlock(); iput(inode); +out: return state; err_put_inode: iput(inode); @@ -690,6 +768,23 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) return; + /* + * Check if we still need to send an OPEN call, or if we can use + * a delegation instead. + */ + if (data->state != NULL) { + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + if (delegation != NULL && + (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { + rcu_read_unlock(); + task->tk_action = NULL; + return; + } + rcu_read_unlock(); + } /* Update sequence id. */ data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_client->cl_clientid; @@ -906,90 +1001,6 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -/* - * Returns a referenced nfs4_state if there is an open delegation on the file - */ -static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) -{ - struct nfs_delegation *delegation; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs4_state_owner *sp = NULL; - struct nfs4_state *state = NULL; - int open_flags = flags & (FMODE_READ|FMODE_WRITE); - int err; - - err = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { - dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); - return err; - } - err = nfs4_recover_expired_lease(server); - if (err != 0) - goto out_put_state_owner; - /* Protect against reboot recovery - NOTE ORDER! */ - down_read(&clp->cl_sem); - /* Protect against delegation recall */ - down_read(&nfsi->rwsem); - delegation = NFS_I(inode)->delegation; - err = -ENOENT; - if (delegation == NULL || (delegation->type & open_flags) != open_flags) - goto out_err; - err = -ENOMEM; - state = nfs4_get_open_state(inode, sp); - if (state == NULL) - goto out_err; - - err = -ENOENT; - if ((state->state & open_flags) == open_flags) { - spin_lock(&inode->i_lock); - update_open_stateflags(state, open_flags); - spin_unlock(&inode->i_lock); - goto out_ok; - } else if (state->state != 0) - goto out_put_open_state; - - lock_kernel(); - err = _nfs4_do_access(inode, cred, open_flags); - unlock_kernel(); - if (err != 0) - goto out_put_open_state; - update_open_stateid(state, NULL, &delegation->stateid, open_flags); -out_ok: - nfs4_put_state_owner(sp); - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - *res = state; - return 0; -out_put_open_state: - nfs4_put_open_state(state); -out_err: - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - if (err != -EACCES) - nfs_inode_return_delegation(inode); -out_put_state_owner: - nfs4_put_state_owner(sp); - return err; -} - -static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) -{ - struct nfs4_exception exception = { }; - struct nfs4_state *res = ERR_PTR(-EIO); - int err; - - do { - err = _nfs4_open_delegated(inode, flags, cred, &res); - if (err == 0) - break; - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), - err, &exception)); - } while (exception.retry); - return res; -} - /* * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* * fields corresponding to attributes that were used to store the verifier. @@ -1016,7 +1027,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct struct nfs_server *server = NFS_SERVER(dir); struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; - int status; + int status; /* Protect against reboot recovery conflicts */ status = -ENOMEM; @@ -1027,12 +1038,17 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct status = nfs4_recover_expired_lease(server); if (status != 0) goto err_put_state_owner; + if (path->dentry->d_inode != NULL) + nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); down_read(&clp->cl_sem); status = -ENOMEM; opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) goto err_release_rwsem; + if (path->dentry->d_inode != NULL) + opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); + status = _nfs4_proc_open(opendata); if (status != 0) goto err_opendata_put; @@ -1099,6 +1115,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int exception.retry = 1; continue; } + if (status == -EAGAIN) { + /* We must have found a delegation */ + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); @@ -1390,9 +1411,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - state = nfs4_open_delegated(dentry->d_inode, openflags, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dir, &path, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { -- cgit v1.2.3-70-g09d2 From 6ee412689027dc7954453aed392ab5c7599c0f73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2007 14:11:36 -0400 Subject: NFSv4: Don't call OPEN if we already have an open stateid for a file If we already have a stateid with the correct open mode for a given file, then we can reuse that stateid instead of re-issuing an OPEN call without violating the close-to-open caching semantics. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea332e831d7..1de07661925 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -324,6 +324,24 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } +static int can_open_cached(struct nfs4_state *state, int mode) +{ + int ret = 0; + switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { + case FMODE_READ: + ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_WRITE: + ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_READ|FMODE_WRITE: + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + } + return ret; +} + static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) { if ((delegation->type & open_flags) != open_flags) @@ -407,7 +425,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open nfs_inode_return_delegation(inode); } -static struct nfs4_state *nfs4_try_open_delegated(struct nfs4_opendata *opendata) +static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); @@ -418,9 +436,19 @@ static struct nfs4_state *nfs4_try_open_delegated(struct nfs4_opendata *opendata rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - if (delegation == NULL) - goto out_unlock; for (;;) { + if (can_open_cached(state, open_mode)) { + spin_lock(&state->owner->so_lock); + if (can_open_cached(state, open_mode)) { + update_open_stateflags(state, open_mode); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + goto out_return_state; + } + spin_unlock(&state->owner->so_lock); + } + if (delegation == NULL) + break; if (!can_open_delegated(delegation, open_mode)) break; /* Save the delegation */ @@ -434,8 +462,9 @@ static struct nfs4_state *nfs4_try_open_delegated(struct nfs4_opendata *opendata ret = -EAGAIN; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); + /* If no delegation, try a cached open */ if (delegation == NULL) - break; + continue; /* Is the delegation still valid? */ if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0) continue; @@ -443,7 +472,6 @@ static struct nfs4_state *nfs4_try_open_delegated(struct nfs4_opendata *opendata update_open_stateid(state, NULL, &stateid, open_mode); goto out_return_state; } -out_unlock: rcu_read_unlock(); out: return ERR_PTR(ret); @@ -461,7 +489,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data int ret; if (!data->rpc_done) { - state = nfs4_try_open_delegated(data); + state = nfs4_try_open_cached(data); goto out; } @@ -775,13 +803,14 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (data->state != NULL) { struct nfs_delegation *delegation; + if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) + goto out_no_action; rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { rcu_read_unlock(); - task->tk_action = NULL; - return; + goto out_no_action; } rcu_read_unlock(); } @@ -792,6 +821,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->timestamp = jiffies; rpc_call_setup(task, &msg, 0); + return; +out_no_action: + task->tk_action = NULL; + } static void nfs4_open_done(struct rpc_task *task, void *calldata) -- cgit v1.2.3-70-g09d2 From 1ac7e2fd35905f3d44df06568bca5f9d140369b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2007 21:04:15 -0400 Subject: NFSv4: Clean up the callers of nfs4_open_recover_helper() Rely on nfs4_try_open_cached() when appropriate. Also fix an RCU violation in _nfs4_do_open_reclaim() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1de07661925..3a2af805376 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -602,6 +602,19 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * if (newstate != state) return -ESTALE; } + /* + * We may have performed cached opens for all three recoveries. + * Check if we need to update the current stateid. + */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && + memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + spin_lock(&state->owner->so_lock); + spin_lock(&state->inode->i_lock); + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + spin_unlock(&state->inode->i_lock); + spin_unlock(&state->owner->so_lock); + } return 0; } @@ -611,26 +624,22 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * */ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; + struct nfs_delegation *delegation; struct nfs4_opendata *opendata; int delegation_type = 0; int status; - if (delegation != NULL) { - if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - memcpy(&state->stateid, &delegation->stateid, - sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - delegation_type = delegation->type; - } opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; opendata->o_arg.fh = NFS_FH(state->inode); nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) + delegation_type = delegation->flags; + rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); @@ -980,21 +989,10 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) */ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct inode *inode = state->inode; - struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs4_opendata *opendata; - int openflags = state->state & (FMODE_READ|FMODE_WRITE); int ret; - if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - ret = _nfs4_do_access(inode, ctx->cred, openflags); - if (ret < 0) - return ret; - memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); -- cgit v1.2.3-70-g09d2 From 8bda4e4c98d14566fc1a354c62fb59d70cc49b97 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Jul 2007 10:45:42 -0400 Subject: NFSv4: Fix up stateid locking... We really don't need to grab both the state->so_owner and the inode->i_lock. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 27 +++++++++++---------------- fs/nfs/nfs4state.c | 10 ++++++---- 3 files changed, 19 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4a1c4d80a57..dd1aa2b598c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -139,9 +139,11 @@ struct nfs4_state { unsigned long flags; /* Do we hold any locks? */ spinlock_t state_lock; /* Protects the lock_states list */ + seqlock_t seqlock; /* Protects the stateid/open_stateid */ nfs4_stateid stateid; /* Current stateid: may be delegation */ nfs4_stateid open_stateid; /* OPEN stateid */ + /* The following 3 fields are protected by owner->so_lock */ unsigned int n_rdonly; /* Number of read-only references */ unsigned int n_wronly; /* Number of write-only references */ unsigned int n_rdwr; /* Number of read/write references */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3a2af805376..ba86ec654c2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -385,29 +385,28 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { - spin_lock(&state->owner->so_lock); - spin_lock(&state->inode->i_lock); + write_seqlock(&state->seqlock); nfs_set_open_stateid_locked(state, stateid, open_flags); - spin_unlock(&state->inode->i_lock); - spin_unlock(&state->owner->so_lock); + write_sequnlock(&state->seqlock); } static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) { - struct inode *inode = state->inode; - open_flags &= (FMODE_READ|FMODE_WRITE); - /* Protect against nfs4_find_state_byowner() */ - spin_lock(&state->owner->so_lock); - spin_lock(&inode->i_lock); + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) nfs_set_open_stateid_locked(state, open_stateid, open_flags); + write_sequnlock(&state->seqlock); + spin_lock(&state->owner->so_lock); update_open_stateflags(state, open_flags); - spin_unlock(&inode->i_lock); spin_unlock(&state->owner->so_lock); } @@ -608,12 +607,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * */ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { - spin_lock(&state->owner->so_lock); - spin_lock(&state->inode->i_lock); + write_seqlock(&state->seqlock); if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); - spin_unlock(&state->inode->i_lock); - spin_unlock(&state->owner->so_lock); + write_sequnlock(&state->seqlock); } return 0; } @@ -1280,7 +1277,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) mode = FMODE_READ|FMODE_WRITE; clear_rd = clear_wr = clear_rdwr = 0; spin_lock(&state->owner->so_lock); - spin_lock(&calldata->inode->i_lock); /* Calculate the change in open mode */ if (state->n_rdwr == 0) { if (state->n_rdonly == 0) { @@ -1294,7 +1290,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); } } - spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); if (!clear_rd && !clear_wr && !clear_rdwr) { /* Note: exit _without_ calling nfs4_close_done */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4fa4054cdf3..523cc2cbb5e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -307,6 +307,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); + seqlock_init(&state->seqlock); return state; } @@ -411,7 +412,6 @@ void nfs4_put_open_state(struct nfs4_state *state) */ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { - struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; int call_close = 0; int newstate; @@ -419,7 +419,6 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) atomic_inc(&owner->so_count); /* Protect against nfs4_find_state() */ spin_lock(&owner->so_lock); - spin_lock(&inode->i_lock); switch (mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_READ: state->n_rdonly--; @@ -446,7 +445,6 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) clear_bit(NFS_DELEGATED_STATE, &state->flags); } nfs4_state_set_mode_locked(state, newstate); - spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); if (!call_close) { @@ -599,8 +597,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; + int seq; - memcpy(dst, &state->stateid, sizeof(*dst)); + do { + seq = read_seqbegin(&state->seqlock); + memcpy(dst, &state->stateid, sizeof(*dst)); + } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) return; -- cgit v1.2.3-70-g09d2 From 9eaa67c6a5b77f248c4703d81c4a6c6434e35385 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:19 -0400 Subject: NFS: Clean-up: use correct type when converting NFS blocks to local blocks inode->i_blocks is a blkcnt_t these days, which can be a u64 or unsigned long, depending on the setting of CONFIG_LSF. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ad2b40db1e6..76cf55d5710 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) /* * Calculate the number of 512byte blocks used. */ -static inline unsigned long nfs_calc_block_size(u64 tsize) +static inline blkcnt_t nfs_calc_block_size(u64 tsize) { - loff_t used = (tsize + 511) >> 9; + blkcnt_t used = (tsize + 511) >> 9; return (used > ULONG_MAX) ? ULONG_MAX : used; } -- cgit v1.2.3-70-g09d2 From 5680d48be88d12cd987e5579a6072a4ca34ca6ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:24 -0400 Subject: NFS: Clean-up: Define macros for maximum host and export path name lengths Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 4 ++-- include/linux/nfs_mount.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 14c7923697d..e7d19708583 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -867,12 +867,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, } } - p = nfs_copy_user_string(NULL, &data->hostname, 256); + p = nfs_copy_user_string(NULL, &data->hostname, NFS4_MAXNAMLEN); if (IS_ERR(p)) goto out_err; hostname = p; - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); + p = nfs_copy_user_string(NULL, &data->mnt_path, NFS4_MAXPATHLEN); if (IS_ERR(p)) goto out_err; mntpath = p; diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index cc8b9c59acb..0b82a17c705 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -37,7 +37,7 @@ struct nfs_mount_data { int acdirmin; /* 1 */ int acdirmax; /* 1 */ struct sockaddr_in addr; /* 1 */ - char hostname[256]; /* 1 */ + char hostname[NFS_MAXNAMLEN + 1]; /* 1 */ int namlen; /* 2 */ unsigned int bsize; /* 3 */ struct nfs3_fh root; /* 4 */ -- cgit v1.2.3-70-g09d2 From 29eb981a3b8eb4e61cd5b9da835768045d0446cb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:30 -0400 Subject: NFS: Clean-up: Replace nfs_copy_user_string with strndup_user The new string utility function strndup_user can be used instead of nfs_copy_user_string, eliminating an unnecessary duplication of function. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 53 ++++++++++++++++------------------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e7d19708583..04ad881eac7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -785,27 +785,6 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } -static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) -{ - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - kfree(p); - return ERR_PTR(-EFAULT); - } - dst[maxlen] = '\0'; - return dst; -} - /* * Get the superblock for an NFS4 mountpoint */ @@ -819,8 +798,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; - char *mntpath = NULL, *hostname = NULL, ip_addr[16]; - void *p; + char *p, *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; int error; if (data == NULL) { @@ -857,39 +835,39 @@ static int nfs4_get_sb(struct file_system_type *fs_type, dprintk("%s: Invalid number of RPC auth flavours %d.\n", __FUNCTION__, data->auth_flavourlen); error = -EINVAL; - goto out_err_noserver; + goto out; } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { error = -EFAULT; - goto out_err_noserver; + goto out; } } - p = nfs_copy_user_string(NULL, &data->hostname, NFS4_MAXNAMLEN); + p = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(p)) goto out_err; hostname = p; - p = nfs_copy_user_string(NULL, &data->mnt_path, NFS4_MAXPATHLEN); + p = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(p)) goto out_err; mntpath = p; dprintk("MNTPATH: %s\n", mntpath); - p = nfs_copy_user_string(ip_addr, &data->client_addr, - sizeof(ip_addr) - 1); + p = strndup_user(data->client_addr.data, 16); if (IS_ERR(p)) goto out_err; + ip_addr = p; /* Get a volume representation */ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, authflavour, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } /* Get a superblock - note that we may end up sharing one that already exists */ @@ -919,25 +897,26 @@ static int nfs4_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; + error = 0; + +out: + kfree(ip_addr); kfree(mntpath); kfree(hostname); - return 0; + return error; out_err: error = PTR_ERR(p); - goto out_err_noserver; + goto out; out_free: nfs_free_server(server); -out_err_noserver: - kfree(mntpath); - kfree(hostname); - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - goto out_err_noserver; + goto out; } static void nfs4_kill_super(struct super_block *sb) -- cgit v1.2.3-70-g09d2 From 0655960f76922a720ad14a510ed91a51395e742b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:35 -0400 Subject: NFS: Clean up error handling in nfs_get_sb The error return logic in nfs_get_sb now matches nfs4_get_sb, and is more maintainable. A subsequent patch will take advantage of this simplification. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 04ad881eac7..aab5cd61725 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -633,13 +633,13 @@ static int nfs_get_sb(struct file_system_type *fs_type, /* Validate the mount data */ error = nfs_validate_mount_data(data, &mntfh); if (error < 0) - return error; + goto out; /* Get a volume representation */ server = nfs_create_server(data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } /* Get a superblock - note that we may end up sharing one that already exists */ @@ -669,17 +669,19 @@ static int nfs_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; - return 0; + error = 0; + +out: + return error; out_err_nosb: nfs_free_server(server); -out_err_noserver: - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - return error; + goto out; } /* -- cgit v1.2.3-70-g09d2 From 4d81cd16112f86dc279d90ef7a24f2b1be339c3c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:40 -0400 Subject: NFS: Clean-up: fix a compiler warning in fs/nfs/super.c /home/cel/linux/fs/nfs/super.c: In function 'nfs_pseudoflavour_to_name': /home/cel/linux/fs/nfs/super.c:270: warning: comparison between signed and unsigned Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aab5cd61725..6eac5bf911e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -263,11 +263,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) { RPC_AUTH_GSS_SPKM, "spkm" }, { RPC_AUTH_GSS_SPKMI, "spkmi" }, { RPC_AUTH_GSS_SPKMP, "spkmp" }, - { -1, "unknown" } + { UINT_MAX, "unknown" } }; int i; - for (i=0; sec_flavours[i].flavour != -1; i++) { + for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) { if (sec_flavours[i].flavour == flavour) break; } -- cgit v1.2.3-70-g09d2 From fc50d58fd053862d6bafcf92f1ef2961296f3a1c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:46 -0400 Subject: NFS: Clean-up: Refactor IP address sanity checks in NFS client NFS and NFSv4 mounts can now share server address sanity checking. And, it provides an easy mechanism for adding IPv6 address checking at some later point. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6eac5bf911e..7f5bc28ea8d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -446,6 +446,23 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) rpc_killall_tasks(rpc); } +/* + * Sanity-check a server address provided by the mount command + */ +static int nfs_verify_server_address(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *) addr; + if (sa->sin_addr.s_addr != INADDR_ANY) + return 1; + break; + } + } + + return 0; +} + /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle @@ -501,7 +518,7 @@ static int nfs_validate_mount_data(struct nfs_mount_data *data, #endif /* CONFIG_NFS_V3 */ /* We now require that the mount process passes the remote address */ - if (data->addr.sin_addr.s_addr == INADDR_ANY) { + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) { dprintk("%s: mount program didn't pass remote address!\n", __FUNCTION__); return -EINVAL; @@ -819,13 +836,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, if (copy_from_user(&addr, data->host_addr, sizeof(addr))) return -EFAULT; - if (addr.sin_family != AF_INET || - addr.sin_addr.s_addr == INADDR_ANY - ) { + if (!nfs_verify_server_address((struct sockaddr *) &addr)) { dprintk("%s: mount program didn't pass remote IP address!\n", __FUNCTION__); return -EINVAL; } + /* RFC3530: The default port for NFS is 2049 */ if (addr.sin_port == 0) addr.sin_port = htons(NFS_PORT); -- cgit v1.2.3-70-g09d2 From 5df36e78da9db1c5f02b429116ed98902bcc75e5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:56 -0400 Subject: NFS: Clean up nfs_validate_mount_data Move error handling code out of the main code path. The switch statement was also improperly indented, according to Documentation/CodingStyle. This prepares nfs_validate_mount_data for the addition of option string parsing. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 125 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 67 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7f5bc28ea8d..baf75e9bd3f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -470,77 +470,86 @@ static int nfs_verify_server_address(struct sockaddr *addr) static int nfs_validate_mount_data(struct nfs_mount_data *data, struct nfs_fh *mntfh) { - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; - } - - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; - } + if (data == NULL) + goto out_no_data; switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - return -EINVAL; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - return -EINVAL; - } - case 5: - memset(data->context, 0, sizeof(data->context)); + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + case 5: + memset(data->context, 0, sizeof(data->context)); + case 6: + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; + + if (mntfh->size > sizeof(mntfh->data)) + goto out_invalid_fh; + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + break; + default: + goto out_bad_version; } - /* Set the pseudoflavor */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) data->pseudoflavor = RPC_AUTH_UNIX; #ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - return -EPROTONOSUPPORT; - } -#endif /* CONFIG_NFS_V3 */ + if (data->flags & NFS_MOUNT_VER3) + goto out_v3_not_compiled; +#endif /* !CONFIG_NFS_V3 */ - /* We now require that the mount process passes the remote address */ - if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - return -EINVAL; - } + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) + goto out_no_address; - /* Prepare the root filehandle */ - if (data->flags & NFS_MOUNT_VER3) - mntfh->size = data->root.size; - else - mntfh->size = NFS2_FHSIZE; + return 0; - if (mntfh->size > sizeof(mntfh->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - return -EINVAL; - } +out_no_data: + dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); + return -EINVAL; - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); +out_no_v3: + dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", + data->version); + return -EINVAL; - return 0; +out_no_sec: + dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); + return -EINVAL; + +out_bad_version: + dfprintk(MOUNT, "NFS: bad nfs_mount_data version %d\n", + data->version); + return -EINVAL; + +#ifndef CONFIG_NFS_V3 +out_v3_not_compiled: + dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V3 */ + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_fh: + dfprintk(MOUNT, "NFS: invalid root filehandle\n"); + return -EINVAL; } /* -- cgit v1.2.3-70-g09d2 From f0768ebd09385551277fcbc8b28c29eb491bf9e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:01 -0400 Subject: NFS: Introduce nfs4_validate_mount_options Refactor NFSv4 mount processing to break out mount data validation in the same way it's broken out in the NFSv2/v3 mount path. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 153 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 89 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index baf75e9bd3f..ed3ec4477a0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -813,6 +813,89 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } +/* + * Validate NFSv4 mount options + */ +static int nfs4_validate_mount_data(struct nfs4_mount_data **options, + const char *dev_name, + struct sockaddr_in *addr, + rpc_authflavor_t *authflavour, + char **hostname, + char **mntpath, + char **ip_addr) +{ + struct nfs4_mount_data *data = *options; + char *c; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + if (data->host_addrlen != sizeof(*addr)) + goto out_no_address; + if (copy_from_user(addr, data->host_addr, sizeof(*addr))) + return -EFAULT; + if (addr->sin_port == 0) + addr->sin_port = htons(NFS_PORT); + if (!nfs_verify_server_address((struct sockaddr *) addr)) + goto out_no_address; + + switch (data->auth_flavourlen) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + if (copy_from_user(authflavour, data->auth_flavours, + sizeof(*authflavour))) + return -EFAULT; + break; + default: + goto out_inval_auth; + } + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *mntpath = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + *ip_addr = c; + + break; + default: + goto out_bad_version; + } + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_inval_auth: + dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", + data->auth_flavourlen); + return -EINVAL; + +out_no_address: + dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); + return -EINVAL; + +out_bad_version: + dfprintk(MOUNT, "NFS4: bad nfs_mount_data version %d\n", + data->version); + return -EINVAL; +} + /* * Get the superblock for an NFS4 mountpoint */ @@ -826,68 +909,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type, rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; - char *p, *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; + char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; int error; - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; - } - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(addr)) - return -EINVAL; - - if (copy_from_user(&addr, data->host_addr, sizeof(addr))) - return -EFAULT; - - if (!nfs_verify_server_address((struct sockaddr *) &addr)) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - return -EINVAL; - } - - /* RFC3530: The default port for NFS is 2049 */ - if (addr.sin_port == 0) - addr.sin_port = htons(NFS_PORT); - - /* Grab the authentication type */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - error = -EINVAL; - goto out; - } - - if (copy_from_user(&authflavour, data->auth_flavours, - sizeof(authflavour))) { - error = -EFAULT; - goto out; - } - } - - p = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); - if (IS_ERR(p)) - goto out_err; - hostname = p; - - p = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); - if (IS_ERR(p)) - goto out_err; - mntpath = p; - - dprintk("MNTPATH: %s\n", mntpath); - - p = strndup_user(data->client_addr.data, 16); - if (IS_ERR(p)) - goto out_err; - ip_addr = p; + /* Validate the mount data */ + error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, + &hostname, &mntpath, &ip_addr); + if (error < 0) + goto out; /* Get a volume representation */ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, @@ -932,10 +961,6 @@ out: kfree(hostname); return error; -out_err: - error = PTR_ERR(p); - goto out; - out_free: nfs_free_server(server); goto out; -- cgit v1.2.3-70-g09d2 From cce63cd6374e6f1b4ea897ece1454feb13993d7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:12 -0400 Subject: SUNRPC: Rename rpcb_getport_external routine In preparation for handling NFS mount option parsing in the kernel, rename rpcb_getport_external as rpcb_get_port_sync, and make it available always (instead of only when CONFIG_ROOT_NFS is enabled). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- include/linux/sunrpc/clnt.h | 7 ++----- net/sunrpc/rpcb_clnt.c | 21 +++++++++++---------- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 49d1008ce1d..f0db4703b1c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpcb_getport_external(&sin, program, version, proto); + return rpcb_getport_sync(&sin, program, version, proto); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 097984b0385..b28d919c775 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -120,8 +120,10 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); + int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); +int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -141,10 +143,5 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -/* - * Helper function for NFSroot support - */ -int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int); - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 9a20f380ab0..fc881a675eb 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -12,6 +12,8 @@ * Copyright (C) 1996, Olaf Kirch */ +#include + #include #include #include @@ -247,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -#ifdef CONFIG_ROOT_NFS /** - * rpcb_getport_external - obtain the port for an RPC service on a given host + * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * * Called from outside the RPC client in a synchronous task context. + * Uses default timeout parameters specified by underlying transport. * - * For now, this supports only version 2 queries, but is used only by - * mount_clnt for NFS_ROOT. + * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 */ -int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, - __u32 vers, int prot) +int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, @@ -278,10 +279,10 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, char hostname[40]; int status; - dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", + __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -296,7 +297,7 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, } return status; } -#endif +EXPORT_SYMBOL_GPL(rpcb_getport_sync); /** * rpcb_getport - obtain the port for a given RPC service on a given host -- cgit v1.2.3-70-g09d2 From 43780b87fa799ae65df11d89d4539d8d6a7c67eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:22 -0400 Subject: SUNRPC: Add a convenient default for the hostname when calling rpc_create() A couple of callers just use a stringified IP address for the rpc client's hostname. Move the logic for constructing this into rpc_create(), so it can be shared. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 13 ++++--------- fs/nfsd/nfs4callback.c | 6 ------ net/sunrpc/clnt.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 878d7a5cb6d..2892ec84306 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -28,8 +28,7 @@ #define MOUNT_UMNT 3 */ -static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, - int, int); +static struct rpc_clnt * mnt_create(struct sockaddr_in *, int, int); static struct rpc_program mnt_program; struct mnt_fhstatus { @@ -52,14 +51,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, .rpc_argp = path, .rpc_resp = &result, }; - char hostname[32]; int status; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); - mnt_clnt = mnt_create(hostname, addr, version, protocol); + mnt_clnt = mnt_create(addr, version, protocol); if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); @@ -73,15 +70,13 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, return status < 0? status : (result.status? -EACCES : 0); } -static struct rpc_clnt * -mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, - int protocol) +static struct rpc_clnt *mnt_create(struct sockaddr_in *srvaddr, int version, + int protocol) { struct rpc_create_args args = { .protocol = protocol, .address = (struct sockaddr *)srvaddr, .addrsize = sizeof(*srvaddr), - .servername = hostname, .program = &mnt_program, .version = version, .authflavor = RPC_AUTH_UNIX, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6b1b487db1e..5443c52b57a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - char clientname[16]; int status; if (atomic_read(&cb->cb_set)) @@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) memset(program->stats, 0, sizeof(cb->cb_stat)); program->stats->program = program; - /* Just here to make some printk's more useful: */ - snprintf(clientname, sizeof(clientname), - "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); - args.servername = clientname; - /* Create RPC client */ cb->cb_client = rpc_create(&args); if (IS_ERR(cb->cb_client)) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e1553cf2a68..0d9b5275fac 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -234,12 +234,25 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; + char servername[20]; xprt = xprt_create_transport(args->protocol, args->address, args->addrsize, args->timeout); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + /* + * If the caller chooses not to specify a hostname, whip + * up a string representation of the passed-in address. + */ + if (args->servername == NULL) { + struct sockaddr_in *addr = + (struct sockaddr_in *) &args->address; + snprintf(servername, sizeof(servername), NIPQUAD_FMT, + NIPQUAD(addr->sin_addr.s_addr)); + args->servername = servername; + } + /* * By default, kernel RPC client connects from a reserved port. * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, -- cgit v1.2.3-70-g09d2 From 3ea97309e6b18bce200211b3f9188e8023321adc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:27 -0400 Subject: NFS: Remake nfsroot_mount as a permanent part of NFS client In preparation for supporting NFSv2 and NFSv3 mount option handling in the kernel NFS client, convert mount_clnt.c to be a permanent part of the NFS client, instead of built only when CONFIG_ROOT_NFS is enabled. In addition, we also replace the "struct sockaddr_in *" argument with something more generic, to help support IPv6 at some later point. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 4 ++-- fs/nfs/mount_clnt.c | 55 +++++++++++++++++++++++++------------------------- fs/nfs/nfsroot.c | 3 ++- include/linux/nfs_fs.h | 5 ++--- 4 files changed, 34 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index f4580b44eef..b55cb236cf7 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ pagelist.o proc.o read.o symlink.o unlink.o \ - write.o namespace.o -nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o + write.o namespace.o mount_clnt.o +nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 2892ec84306..ee4899d9662 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -18,7 +18,7 @@ #include #ifdef RPC_DEBUG -# define NFSDBG_FACILITY NFSDBG_ROOT +# define NFSDBG_FACILITY NFSDBG_MOUNT #endif /* @@ -28,7 +28,6 @@ #define MOUNT_UMNT 3 */ -static struct rpc_clnt * mnt_create(struct sockaddr_in *, int, int); static struct rpc_program mnt_program; struct mnt_fhstatus { @@ -36,14 +35,21 @@ struct mnt_fhstatus { struct nfs_fh * fh; }; -/* - * Obtain an NFS file handle for the given host and path +/** + * nfs_mount - Obtain an NFS file handle for the given host and path + * @addr: pointer to server's address + * @len: size of server's address + * @hostname: name of server host, or NULL + * @path: pointer to string containing export path to mount + * @version: mount version to use for this request + * @protocol: transport protocol to use for thie request + * @fh: pointer to location to place returned file handle + * + * Uses default timeout parameters specified by underlying transport. */ -int -nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, - int version, int protocol) +int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, + int version, int protocol, struct nfs_fh *fh) { - struct rpc_clnt *mnt_clnt; struct mnt_fhstatus result = { .fh = fh }; @@ -51,12 +57,23 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, .rpc_argp = path, .rpc_resp = &result, }; + struct rpc_create_args args = { + .protocol = protocol, + .address = addr, + .addrsize = len, + .servername = hostname, + .program = &mnt_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_INTR, + }; + struct rpc_clnt *mnt_clnt; int status; - dprintk("NFS: nfs_mount(%08x:%s)\n", - (unsigned)ntohl(addr->sin_addr.s_addr), path); + dprintk("NFS: sending MNT request for %s:%s\n", + (hostname ? hostname : "server"), path); - mnt_clnt = mnt_create(addr, version, protocol); + mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); @@ -70,22 +87,6 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, return status < 0? status : (result.status? -EACCES : 0); } -static struct rpc_clnt *mnt_create(struct sockaddr_in *srvaddr, int version, - int protocol) -{ - struct rpc_create_args args = { - .protocol = protocol, - .address = (struct sockaddr *)srvaddr, - .addrsize = sizeof(*srvaddr), - .program = &mnt_program, - .version = version, - .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_INTR, - }; - - return rpc_create(&args); -} - /* * XDR encode/decode functions for MOUNT */ diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index f0db4703b1c..3490322d114 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void) NFS_MNT3_VERSION : NFS_MNT_VERSION; set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); + status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, + nfs_path, version, protocol, &fh); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " "while mounting %s\n", status, nfs_path); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 04f659f1e56..c098ae194f7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -494,10 +494,9 @@ static inline void nfs3_forget_cached_acls(struct inode *inode) /* * linux/fs/mount_clnt.c - * (Used only by nfsroot module) */ -extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *, - int, int); +extern int nfs_mount(struct sockaddr *, size_t, char *, char *, + int, int, struct nfs_fh *); /* * inline functions -- cgit v1.2.3-70-g09d2 From 19207231c9874899e7511507ebb1b88d648a5743 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:33 -0400 Subject: NFS: Clean up in-kernel NFS mount Clean up white space and coding conventions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 83 ++++++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ee4899d9662..961dc524327 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -1,7 +1,5 @@ /* - * linux/fs/nfs/mount_clnt.c - * - * MOUNT client to support NFSroot. + * In-kernel MOUNT protocol client * * Copyright (C) 1997, Olaf Kirch */ @@ -21,18 +19,11 @@ # define NFSDBG_FACILITY NFSDBG_MOUNT #endif -/* -#define MOUNT_PROGRAM 100005 -#define MOUNT_VERSION 1 -#define MOUNT_MNT 1 -#define MOUNT_UMNT 3 - */ - static struct rpc_program mnt_program; struct mnt_fhstatus { - unsigned int status; - struct nfs_fh * fh; + u32 status; + struct nfs_fh *fh; }; /** @@ -90,8 +81,8 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, /* * XDR encode/decode functions for MOUNT */ -static int -xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) +static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, + const char *path) { p = xdr_encode_string(p, path); @@ -99,8 +90,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) return 0; } -static int -xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -111,8 +102,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) return 0; } -static int -xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -131,53 +122,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) #define MNT_fhstatus_sz (1 + 8) #define MNT_fhstatus3_sz (1 + 16) -static struct rpc_procinfo mnt_procedures[] = { -[MNTPROC_MNT] = { - .p_proc = MNTPROC_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus_sz, - .p_statidx = MNTPROC_MNT, - .p_name = "MOUNT", +static struct rpc_procinfo mnt_procedures[] = { + [MNTPROC_MNT] = { + .p_proc = MNTPROC_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus_sz, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; static struct rpc_procinfo mnt3_procedures[] = { -[MOUNTPROC3_MNT] = { - .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus3_sz, - .p_statidx = MOUNTPROC3_MNT, - .p_name = "MOUNT", + [MOUNTPROC3_MNT] = { + .p_proc = MOUNTPROC3_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus3_sz, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; -static struct rpc_version mnt_version1 = { - .number = 1, - .nrprocs = 2, - .procs = mnt_procedures +static struct rpc_version mnt_version1 = { + .number = 1, + .nrprocs = 2, + .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { - .number = 3, - .nrprocs = 2, - .procs = mnt3_procedures +static struct rpc_version mnt_version3 = { + .number = 3, + .nrprocs = 2, + .procs = mnt3_procedures, }; -static struct rpc_version * mnt_version[] = { +static struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, &mnt_version3, }; -static struct rpc_stat mnt_stats; +static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), -- cgit v1.2.3-70-g09d2 From 013a8c1ab5a214c608e12b602770449fb6b15a81 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:38 -0400 Subject: NFS: Improve debugging output in NFS in-kernel mount client Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 961dc524327..8afd9f7e7a9 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -66,7 +66,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) - return PTR_ERR(mnt_clnt); + goto out_clnt_err; if (version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; @@ -75,7 +75,31 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, status = rpc_call_sync(mnt_clnt, &msg, 0); rpc_shutdown_client(mnt_clnt); - return status < 0? status : (result.status? -EACCES : 0); + + if (status < 0) + goto out_call_err; + if (result.status != 0) + goto out_mnt_err; + + dprintk("NFS: MNT request succeeded\n"); + status = 0; + +out: + return status; + +out_clnt_err: + status = PTR_ERR(mnt_clnt); + dprintk("NFS: failed to create RPC client, status=%d\n", status); + goto out; + +out_call_err: + dprintk("NFS: failed to start MNT request, status=%d\n", status); + goto out; + +out_mnt_err: + dprintk("NFS: MNT server returned result %d\n", result.status); + status = -EACCES; + goto out; } /* -- cgit v1.2.3-70-g09d2 From bf0fd7680f1cf31b9cbabcc037a204548e2c866d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:44 -0400 Subject: NFS: Add enums and match tables for mount option parsing This generic infrastructure works for both NFS and NFSv4 mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 528 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 528 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ed3ec4477a0..7e56411e55f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -57,6 +58,164 @@ #define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + + struct { + struct sockaddr_in address; + unsigned int program; + unsigned int version; + unsigned short port; + int protocol; + } mount_server; + + struct { + struct sockaddr_in address; + char *hostname; + char *export_path; + unsigned int program; + int protocol; + } nfs_server; +}; + +enum { + /* Mount options that take no arguments */ + Opt_soft, Opt_hard, + Opt_intr, Opt_nointr, + Opt_posix, Opt_noposix, + Opt_cto, Opt_nocto, + Opt_ac, Opt_noac, + Opt_lock, Opt_nolock, + Opt_v2, Opt_v3, + Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, + Opt_rdirplus, Opt_nordirplus, + + /* Mount options that take integer arguments */ + Opt_port, + Opt_rsize, Opt_wsize, Opt_bsize, + Opt_timeo, Opt_retrans, + Opt_acregmin, Opt_acregmax, + Opt_acdirmin, Opt_acdirmax, + Opt_actimeo, + Opt_namelen, + Opt_mountport, + Opt_mountprog, Opt_mountvers, + Opt_nfsprog, Opt_nfsvers, + + /* Mount options that take string arguments */ + Opt_sec, Opt_proto, Opt_mountproto, + Opt_addr, Opt_mounthost, Opt_clientaddr, + + /* Mount options that are ignored */ + Opt_userspace, Opt_deprecated, + + Opt_err +}; + +static match_table_t nfs_mount_option_tokens = { + { Opt_userspace, "bg" }, + { Opt_userspace, "fg" }, + { Opt_soft, "soft" }, + { Opt_hard, "hard" }, + { Opt_intr, "intr" }, + { Opt_nointr, "nointr" }, + { Opt_posix, "posix" }, + { Opt_noposix, "noposix" }, + { Opt_cto, "cto" }, + { Opt_nocto, "nocto" }, + { Opt_ac, "ac" }, + { Opt_noac, "noac" }, + { Opt_lock, "lock" }, + { Opt_nolock, "nolock" }, + { Opt_v2, "v2" }, + { Opt_v3, "v3" }, + { Opt_udp, "udp" }, + { Opt_tcp, "tcp" }, + { Opt_acl, "acl" }, + { Opt_noacl, "noacl" }, + { Opt_rdirplus, "rdirplus" }, + { Opt_nordirplus, "nordirplus" }, + + { Opt_port, "port=%u" }, + { Opt_rsize, "rsize=%u" }, + { Opt_wsize, "wsize=%u" }, + { Opt_bsize, "bsize=%u" }, + { Opt_timeo, "timeo=%u" }, + { Opt_retrans, "retrans=%u" }, + { Opt_acregmin, "acregmin=%u" }, + { Opt_acregmax, "acregmax=%u" }, + { Opt_acdirmin, "acdirmin=%u" }, + { Opt_acdirmax, "acdirmax=%u" }, + { Opt_actimeo, "actimeo=%u" }, + { Opt_userspace, "retry=%u" }, + { Opt_namelen, "namlen=%u" }, + { Opt_mountport, "mountport=%u" }, + { Opt_mountprog, "mountprog=%u" }, + { Opt_mountvers, "mountvers=%u" }, + { Opt_nfsprog, "nfsprog=%u" }, + { Opt_nfsvers, "nfsvers=%u" }, + { Opt_nfsvers, "vers=%u" }, + + { Opt_sec, "sec=%s" }, + { Opt_proto, "proto=%s" }, + { Opt_mountproto, "mountproto=%s" }, + { Opt_addr, "addr=%s" }, + { Opt_clientaddr, "clientaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, + + { Opt_err, NULL } +}; + +enum { + Opt_xprt_udp, Opt_xprt_tcp, + + Opt_xprt_err +}; + +static match_table_t nfs_xprt_protocol_tokens = { + { Opt_xprt_udp, "udp" }, + { Opt_xprt_tcp, "tcp" }, + + { Opt_xprt_err, NULL } +}; + +enum { + Opt_sec_none, Opt_sec_sys, + Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, + Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, + Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, + + Opt_sec_err +}; + +static match_table_t nfs_secflavor_tokens = { + { Opt_sec_none, "none" }, + { Opt_sec_none, "null" }, + { Opt_sec_sys, "sys" }, + + { Opt_sec_krb5, "krb5" }, + { Opt_sec_krb5i, "krb5i" }, + { Opt_sec_krb5p, "krb5p" }, + + { Opt_sec_lkey, "lkey" }, + { Opt_sec_lkeyi, "lkeyi" }, + { Opt_sec_lkeyp, "lkeyp" }, + + { Opt_sec_err, NULL } +}; + + static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); @@ -463,6 +622,375 @@ static int nfs_verify_server_address(struct sockaddr *addr) return 0; } +/* + * Error-check and convert a string of mount options from user space into + * a data structure + */ +static int nfs_parse_mount_options(char *raw, + struct nfs_parsed_mount_data *mnt) +{ + char *p, *string; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; + } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); + + while ((p = strsep(&raw, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int option, token; + + if (!*p) + continue; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { + case Opt_soft: + mnt->flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + mnt->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_intr: + mnt->flags |= NFS_MOUNT_INTR; + break; + case Opt_nointr: + mnt->flags &= ~NFS_MOUNT_INTR; + break; + case Opt_posix: + mnt->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + mnt->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + mnt->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + mnt->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + mnt->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + mnt->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + mnt->flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + mnt->flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case Opt_v3: + mnt->flags |= NFS_MOUNT_VER3; + break; + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + case Opt_acl: + mnt->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + mnt->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + mnt->flags |= NFS_MOUNT_NORDIRPLUS; + break; + + case Opt_port: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->nfs_server.address.sin_port = htonl(option); + break; + case Opt_rsize: + if (match_int(args, &mnt->rsize)) + return 0; + break; + case Opt_wsize: + if (match_int(args, &mnt->wsize)) + return 0; + break; + case Opt_bsize: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->bsize = option; + break; + case Opt_timeo: + if (match_int(args, &mnt->timeo)) + return 0; + break; + case Opt_retrans: + if (match_int(args, &mnt->retrans)) + return 0; + break; + case Opt_acregmin: + if (match_int(args, &mnt->acregmin)) + return 0; + break; + case Opt_acregmax: + if (match_int(args, &mnt->acregmax)) + return 0; + break; + case Opt_acdirmin: + if (match_int(args, &mnt->acdirmin)) + return 0; + break; + case Opt_acdirmax: + if (match_int(args, &mnt->acdirmax)) + return 0; + break; + case Opt_actimeo: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->acregmin = + mnt->acregmax = + mnt->acdirmin = + mnt->acdirmax = option; + break; + case Opt_namelen: + if (match_int(args, &mnt->namlen)) + return 0; + break; + case Opt_mountport: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->mount_server.port = option; + break; + case Opt_mountprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.program = option; + break; + case Opt_mountvers: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.version = option; + break; + case Opt_nfsprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->nfs_server.program = option; + break; + case Opt_nfsvers: + if (match_int(args, &option)) + return 0; + switch (option) { + case 2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case 3: + mnt->flags |= NFS_MOUNT_VER3; + break; + default: + goto out_unrec_vers; + } + break; + + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_secflavor_tokens, args); + kfree(string); + + /* + * The flags setting is for v2/v3. The flavor_len + * setting is for v4. v2/v3 also need to know the + * difference between NULL and UNIX. + */ + switch (token) { + case Opt_sec_none: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + goto out_unrec_sec; + } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->mount_server.protocol = IPPROTO_UDP; + break; + case Opt_tcp: + mnt->mount_server.protocol = IPPROTO_TCP; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->nfs_server.address.sin_family = AF_INET; + mnt->nfs_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; + case Opt_clientaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->client_address = string; + break; + case Opt_mounthost: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->mount_server.address.sin_family = AF_INET; + mnt->mount_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; + + case Opt_userspace: + case Opt_deprecated: + break; + + default: + goto out_unknown; + } + } + + return 1; + +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return 0; + +out_unrec_vers: + printk(KERN_INFO "NFS: unrecognized NFS version number\n"); + return 0; + +out_unrec_xprt: + printk(KERN_INFO "NFS: unrecognized transport protocol\n"); + return 0; + +out_unrec_sec: + printk(KERN_INFO "NFS: unrecognized security flavor\n"); + return 0; + +out_unknown: + printk(KERN_INFO "NFS: unknown mount option: %s\n", p); + return 0; +} + /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle -- cgit v1.2.3-70-g09d2 From 0076d7b7bab580ca2e94637d351fa7cd357743a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:49 -0400 Subject: NFS: Introduce generic mount client API For NFSv2 and v3 mounts, the first step is to contact the server's MOUNTD and request the file handle for the root of the mounted share. Add a function to the NFS client that handles this operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7e56411e55f..48db52a7067 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -991,6 +991,63 @@ out_unknown: return 0; } +/* + * Use the remote server's MOUNT service to request the NFS file handle + * corresponding to the provided path. + */ +static int nfs_try_mount(struct nfs_parsed_mount_data *args, + struct nfs_fh *root_fh) +{ + struct sockaddr_in sin; + int status; + + if (args->mount_server.version == 0) { + if (args->flags & NFS_MOUNT_VER3) + args->mount_server.version = NFS_MNT3_VERSION; + else + args->mount_server.version = NFS_MNT_VERSION; + } + + /* + * Construct the mount server's address. + */ + if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY) + sin = args->mount_server.address; + else + sin = args->nfs_server.address; + if (args->mount_server.port == 0) { + status = rpcb_getport_sync(&sin, + args->mount_server.program, + args->mount_server.version, + args->mount_server.protocol); + if (status < 0) + goto out_err; + sin.sin_port = htons(status); + } else + sin.sin_port = htons(args->mount_server.port); + + /* + * Now ask the mount server to map our export path + * to a file handle. + */ + status = nfs_mount((struct sockaddr *) &sin, + sizeof(sin), + args->nfs_server.hostname, + args->nfs_server.export_path, + args->mount_server.version, + args->mount_server.protocol, + root_fh); + if (status < 0) + goto out_err; + + return status; + +out_err: + dfprintk(MOUNT, "NFS: unable to contact server on host " + NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); + return status; +} + /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle -- cgit v1.2.3-70-g09d2 From 136d558ce766967fe3cbf54c3351aba261b5d53b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:54 -0400 Subject: NFS: Add final pieces to support in-kernel mount option parsing Hook in final components required for supporting in-kernel mount option parsing for NFSv2 and NFSv3 mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 48db52a7067..757aa3b7e64 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1051,10 +1051,28 @@ out_err: /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + * + * XXX: as far as I can tell, changing the NFS program number is not + * supported in the NFS client. */ -static int nfs_validate_mount_data(struct nfs_mount_data *data, - struct nfs_fh *mntfh) +static int nfs_validate_mount_data(struct nfs_mount_data **options, + struct nfs_fh *mntfh, + const char *dev_name) { + struct nfs_mount_data *data = *options; + if (data == NULL) goto out_no_data; @@ -1087,8 +1105,78 @@ static int nfs_validate_mount_data(struct nfs_mount_data *data, memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); break; - default: - goto out_bad_version; + default: { + unsigned int len; + char *c; + int status; + struct nfs_parsed_mount_data args = { + .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .mount_server.protocol = IPPROTO_UDP, + .mount_server.program = NFS_MNT_PROGRAM, + .nfs_server.protocol = IPPROTO_TCP, + .nfs_server.program = NFS_PROGRAM, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* + * NB: after this point, caller will free "data" + * if we return an error + */ + *options = data; + + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + len = c - dev_name - 1; + if (len > sizeof(data->hostname)) + return -EINVAL; + strncpy(data->hostname, dev_name, len); + args.nfs_server.hostname = data->hostname; + + c++; + if (strlen(c) > NFS_MAXPATHLEN) + return -EINVAL; + args.nfs_server.export_path = c; + + status = nfs_try_mount(&args, mntfh); + if (status) + return -EINVAL; + + /* + * Translate to nfs_mount_data, which nfs_fill_super + * can deal with. + */ + data->version = 6; + data->flags = args.flags; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->addr = args.nfs_server.address; + data->namlen = args.namlen; + data->bsize = args.bsize; + data->pseudoflavor = args.auth_flavors[0]; + + break; + } } if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -1117,11 +1205,6 @@ out_no_sec: dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); return -EINVAL; -out_bad_version: - dfprintk(MOUNT, "NFS: bad nfs_mount_data version %d\n", - data->version); - return -EINVAL; - #ifndef CONFIG_NFS_V3 out_v3_not_compiled: dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); @@ -1242,7 +1325,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, int error; /* Validate the mount data */ - error = nfs_validate_mount_data(data, &mntfh); + error = nfs_validate_mount_data(&data, &mntfh, dev_name); if (error < 0) goto out; @@ -1283,6 +1366,8 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: + if (data != raw_data) + kfree(data); return error; out_err_nosb: -- cgit v1.2.3-70-g09d2 From 8007122520f0a3599bdc4df47358a5d83b2574aa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:59 -0400 Subject: NFS: Add support for mounting NFSv4 file systems with string options Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 91 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/nfs4_mount.h | 2 +- 2 files changed, 85 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 757aa3b7e64..064e69d2fdd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1541,8 +1541,90 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, *ip_addr = c; break; - default: - goto out_bad_version; + default: { + unsigned int len; + struct nfs_parsed_mount_data args = { + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .nfs_server.protocol = IPPROTO_TCP, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address((struct sockaddr *) + &args.nfs_server.address)) + return -EINVAL; + *addr = args.nfs_server.address; + + switch (args.auth_flavor_len) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; + break; + default: + goto out_inval_auth; + } + + /* + * Translate to nfs4_mount_data, which nfs4_fill_super + * can deal with. + */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + *options = data; + + data->version = 1; + data->flags = args.flags & NFS4_MOUNT_FLAGMASK; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->proto = args.nfs_server.protocol; + + /* + * Split "dev_name" into "hostname:mntpath". + */ + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + /* while calculating len, pretend ':' is '\0' */ + len = c - dev_name; + if (len > NFS4_MAXNAMLEN) + return -EINVAL; + *hostname = kzalloc(len, GFP_KERNEL); + if (*hostname == NULL) + return -ENOMEM; + strncpy(*hostname, dev_name, len - 1); + + c++; /* step over the ':' */ + len = strlen(c); + if (len > NFS4_MAXPATHLEN) + return -EINVAL; + *mntpath = kzalloc(len + 1, GFP_KERNEL); + if (*mntpath == NULL) + return -ENOMEM; + strncpy(*mntpath, c, len); + + dprintk("MNTPATH: %s\n", *mntpath); + + *ip_addr = args.client_address; + + break; + } } return 0; @@ -1559,11 +1641,6 @@ out_inval_auth: out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; - -out_bad_version: - dfprintk(MOUNT, "NFS4: bad nfs_mount_data version %d\n", - data->version); - return -EINVAL; } /* diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index 26b4c83f831..d8d7480e5a4 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,6 @@ struct nfs4_mount_data { #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ -#define NFS4_MOUNT_FLAGMASK 0xFFFF +#define NFS4_MOUNT_FLAGMASK 0x1033 #endif -- cgit v1.2.3-70-g09d2 From 75180df2ed467866ada839fe73cf7cc7d75c0a22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 May 2007 16:53:28 -0400 Subject: NFS: Add the mount option "nosharecache" Prior to David Howell's mount changes in 2.6.18, users who mounted different directories which happened to be from the same filesystem on the server would get different super blocks, and hence could choose different mount options. As long as there were no hard linked files that crossed from one subtree to another, this was quite safe. Post the changes, if the two directories are on the same filesystem (have the same 'fsid'), they will share the same super block, and hence the same mount options. Add a flag to allow users to elect not to share the NFS super block with another mount point, even if the fsids are the same. This will allow users to set different mount options for the two different super blocks, as was previously possible. It is still up to the user to ensure that there are no cache coherency issues when doing this, however the default behaviour will be to share super blocks whenever two paths result in the same fsid. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 43 ++++++++++++++++++++++++++++++++++++++----- include/linux/nfs4_mount.h | 3 ++- include/linux/nfs_mount.h | 1 + 3 files changed, 41 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 064e69d2fdd..1b555cd41e3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -100,6 +100,7 @@ enum { Opt_udp, Opt_tcp, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, + Opt_sharecache, Opt_nosharecache, /* Mount options that take integer arguments */ Opt_port, @@ -146,6 +147,8 @@ static match_table_t nfs_mount_option_tokens = { { Opt_noacl, "noacl" }, { Opt_rdirplus, "rdirplus" }, { Opt_nordirplus, "nordirplus" }, + { Opt_sharecache, "sharecache" }, + { Opt_nosharecache, "nosharecache" }, { Opt_port, "port=%u" }, { Opt_rsize, "rsize=%u" }, @@ -450,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; @@ -714,6 +718,12 @@ static int nfs_parse_mount_options(char *raw, case Opt_nordirplus: mnt->flags |= NFS_MOUNT_NORDIRPLUS; break; + case Opt_sharecache: + mnt->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + mnt->flags |= NFS_MOUNT_UNSHARED; + break; case Opt_port: if (match_int(args, &option)) @@ -1309,6 +1319,9 @@ static int nfs_compare_super(struct super_block *sb, void *data) if (old->nfs_client != server->nfs_client) return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) + return 0; if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; return 1; @@ -1322,6 +1335,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; /* Validate the mount data */ @@ -1336,8 +1350,11 @@ static int nfs_get_sb(struct file_system_type *fs_type, goto out; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1402,6 +1419,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs_xdev_get_sb()\n"); @@ -1413,8 +1431,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1657,6 +1678,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, struct nfs_fh mntfh; struct dentry *mntroot; char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; /* Validate the mount data */ @@ -1673,8 +1695,11 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto out; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -1740,6 +1765,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_xdev_get_sb()\n"); @@ -1751,8 +1777,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1807,6 +1836,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot; struct nfs_fh mntfh; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_referral_get_sb()\n"); @@ -1818,8 +1848,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index d8d7480e5a4..a0dcf665565 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,7 @@ struct nfs4_mount_data { #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ -#define NFS4_MOUNT_FLAGMASK 0x1033 +#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ +#define NFS4_MOUNT_FLAGMASK 0x9033 #endif diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 0b82a17c705..a3ade89a64d 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -62,6 +62,7 @@ struct nfs_mount_data { #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ +#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif -- cgit v1.2.3-70-g09d2 From 275a5d24bf56b2d9dd4644c54a56366b89a028f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 May 2007 16:53:28 -0400 Subject: NFS: Error when mounting the same filesystem with different options Unless the user sets the NFS_MOUNT_NOSHAREDCACHE mount flag, we should return EBUSY if the filesystem is already mounted on a superblock that has set conflicting mount options. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1b555cd41e3..a2b1af89ca1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1317,7 +1317,9 @@ static int nfs_compare_super(struct super_block *sb, void *data) { struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->nfs_client != server->nfs_client) + if (memcmp(&old->nfs_client->cl_addr, + &server->nfs_client->cl_addr, + sizeof(old->nfs_client->cl_addr)) != 0) return 0; /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ if (old->flags & NFS_MOUNT_UNSHARED) @@ -1327,6 +1329,39 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 1; } +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +{ + const struct nfs_server *a = s->s_fs_info; + const struct rpc_clnt *clnt_a = a->client; + const struct rpc_clnt *clnt_b = b->client; + + if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + goto Ebusy; + if (a->nfs_client != b->nfs_client) + goto Ebusy; + if (a->flags != b->flags) + goto Ebusy; + if (a->wsize != b->wsize) + goto Ebusy; + if (a->rsize != b->rsize) + goto Ebusy; + if (a->acregmin != b->acregmin) + goto Ebusy; + if (a->acregmax != b->acregmax) + goto Ebusy; + if (a->acdirmin != b->acdirmin) + goto Ebusy; + if (a->acdirmax != b->acdirmax) + goto Ebusy; + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + goto Ebusy; + return 0; +Ebusy: + return -EBUSY; +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -1361,8 +1396,11 @@ static int nfs_get_sb(struct file_system_type *fs_type, } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -1442,8 +1480,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { -- cgit v1.2.3-70-g09d2 From 6f2e64d3e1f661095e274c9d9d47e3f39a6cf1c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Jul 2007 10:53:21 -0400 Subject: NFSv4: Make the NFS state model work with the nosharedcache mount option Consider the case where the user has mounted the remote filesystem server:/foo on the two local directories /bar and /baz using the nosharedcache mount option. The files /bar/file and /baz/file are represented by different inodes in the local namespace, but refer to the same file /foo/file on the server. Consider the case where a process opens both /bar/file and /baz/file, then closes /bar/file: because the nfs4_state is not shared between /bar/file and /baz/file, the kernel will see that the nfs4_state for /bar/file is no longer referenced, so it will send off a CLOSE rpc call. Unless the open_owners differ, then that CLOSE call will invalidate the open state on /baz/file too. Conclusion: we cannot share open state owners between two different non-shared mount instances of the same filesystem. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index dd1aa2b598c..6c028e734fe 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -83,6 +83,7 @@ struct nfs_unique_id { struct nfs4_state_owner { struct nfs_unique_id so_owner_id; struct nfs_client *so_client; + struct nfs_server *so_server; struct rb_node so_client_node; struct rpc_cred *so_cred; /* Associated cred */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 523cc2cbb5e..e9662ba81d8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -156,8 +156,9 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) } static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) { + struct nfs_client *clp = server->nfs_client; struct rb_node **p = &clp->cl_state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp, *res = NULL; @@ -166,6 +167,14 @@ nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) parent = *p; sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + if (server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (server > sp->so_server) { + p = &parent->rb_right; + continue; + } if (cred < sp->so_cred) p = &parent->rb_left; else if (cred > sp->so_cred) @@ -190,6 +199,14 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) parent = *p; sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + if (new->so_server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (new->so_server > sp->so_server) { + p = &parent->rb_right; + continue; + } if (new->so_cred < sp->so_cred) p = &parent->rb_left; else if (new->so_cred > sp->so_cred) @@ -260,7 +277,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct struct nfs4_state_owner *sp, *new; spin_lock(&clp->cl_lock); - sp = nfs4_find_state_owner(clp, cred); + sp = nfs4_find_state_owner(server, cred); spin_unlock(&clp->cl_lock); if (sp != NULL) return sp; @@ -268,6 +285,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct if (new == NULL) return NULL; new->so_client = clp; + new->so_server = server; new->so_cred = cred; spin_lock(&clp->cl_lock); sp = nfs4_insert_state_owner(clp, new); -- cgit v1.2.3-70-g09d2 From c98451bdb2f3e6d6cc1e03adad641e9497512b49 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:25:29 +0200 Subject: NLM: fix source address of callback to client Use the destination address of the original NLM request as the source address in callbacks to the client. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 27 +++++++++++++++++++-------- include/linux/lockd/lockd.h | 1 + 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index c252a1c9585..572601e98dc 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, */ static struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *sin, - int proto, int version, - const char *hostname, - int hostname_len) + int proto, int version, const char *hostname, + int hostname_len, const struct sockaddr_in *ssin) { struct hlist_head *chain; struct hlist_node *pos; @@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, struct nsm_handle *nsm = NULL; int hash; - dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", + dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT + ", p=%d, v=%d, my role=%s, name=%.*s)\n", + NIPQUAD(ssin->sin_addr.s_addr), NIPQUAD(sin->sin_addr.s_addr), proto, version, server? "server" : "client", hostname_len, @@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, continue; if (host->h_server != server) continue; + if (!nlm_cmp_addr(&host->h_saddr, ssin)) + continue; /* Move to head of hash chain. */ hlist_del(&host->h_hash); @@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, host->h_name = nsm->sm_name; host->h_addr = *sin; host->h_addr.sin_port = 0; /* ouch! */ + host->h_saddr = *ssin; host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; @@ -174,8 +178,10 @@ struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + return nlm_lookup_host(0, sin, proto, version, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -185,9 +191,12 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + + ssin.sin_addr = rqstp->rq_daddr.addr; return nlm_lookup_host(1, svc_addr_in(rqstp), rqstp->rq_prot, rqstp->rq_vers, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -198,8 +207,9 @@ nlm_bind_host(struct nlm_host *host) { struct rpc_clnt *clnt; - dprintk("lockd: nlm_bind_host(%08x)\n", - (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); + dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", + NIPQUAD(host->h_saddr.sin_addr), + NIPQUAD(host->h_addr.sin_addr)); /* Lock host handle */ mutex_lock(&host->h_mutex); @@ -226,6 +236,7 @@ nlm_bind_host(struct nlm_host *host) .protocol = host->h_proto, .address = (struct sockaddr *)&host->h_addr, .addrsize = sizeof(host->h_addr), + .saddress = (struct sockaddr *)&host->h_saddr, .timeout = &timeparms, .servername = host->h_name, .program = &nlm_program, diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 05707e2fcca..e2d1ce36b36 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -39,6 +39,7 @@ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ struct sockaddr_in h_addr; /* peer address */ + struct sockaddr_in h_saddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ u32 h_version; /* interface version */ -- cgit v1.2.3-70-g09d2 From 137d6acaa64afa4cf3d977417424e731ea04705a Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Mon, 9 Jul 2007 15:32:29 -0700 Subject: NFSv4: Make sure unlock is really an unlock when cancelling a lock I ran into a curious issue when a lock is being canceled. The cancellation results in a lock request to the vfs layer instead of an unlock request. This is particularly insidious when the process that owns the lock is exiting. In that case, sometimes the erroneous lock is applied AFTER the process has entered zombie state, preventing the lock from ever being released. Eventually other processes block on the lock causing a slow degredation of the system. In the 2.6.16 kernel this was investigated on, the problem is compounded by the fact that the cl_sem is held while blocking on the vfs lock, which results in most processes accessing the nfs file system in question hanging. In more detail, here is how the situation occurs: first _nfs4_do_setlk(): static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) ... ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ... } else data->cancelled = 1; then nfs4_lock_release(): static void nfs4_lock_release(void *calldata) ... if (data->cancelled != 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); The problem is the same file_lock that was passed in to _nfs4_do_setlk() gets passed to nfs4_do_unlck() from nfs4_lock_release(). So the type is still F_RDLCK or FWRLCK, not F_UNLCK. At some point, when cancelling the lock, the type needs to be changed to F_UNLCK. It seemed easiest to do that in nfs4_do_unlck(), but it could be done in nfs4_lock_release(). The concern I had with doing it there was if something still needed the original file_lock, though it turns out the original file_lock still needs to be modified by nfs4_do_unlck() because nfs4_do_unlck() uses the original file_lock to pass to the vfs layer, and a copy of the original file_lock for the RPC request. It seems like the simplest solution is to force all situations where nfs4_do_unlck() is being used to result in an unlock, so with that in mind, I made the following change: Signed-off-by: Frank Filz Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ba86ec654c2..fee2da856c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3275,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, { struct nfs4_unlockdata *data; + /* Ensure this is an unlock - when canceling a lock, the + * canceled lock is passed in, and it won't be an unlock. + */ + fl->fl_type = F_UNLCK; + data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { nfs_free_seqid(seqid); -- cgit v1.2.3-70-g09d2