diff options
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/nfs/cache_lib.c | 9 | ||||
-rw-r--r-- | fs/nfs/cache_lib.h | 2 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 57 | ||||
-rw-r--r-- | fs/nfs/client.c | 7 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 16 | ||||
-rw-r--r-- | fs/nfs/dir.c | 87 | ||||
-rw-r--r-- | fs/nfs/direct.c | 6 | ||||
-rw-r--r-- | fs/nfs/file.c | 18 | ||||
-rw-r--r-- | fs/nfs/fscache.c | 8 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 7 | ||||
-rw-r--r-- | fs/nfs/inode.c | 20 | ||||
-rw-r--r-- | fs/nfs/internal.h | 13 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 10 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 15 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 81 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.h | 17 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayoutdev.c | 452 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 285 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 21 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 249 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 20 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 73 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 221 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 74 | ||||
-rw-r--r-- | fs/nfs/pnfs_dev.c | 64 | ||||
-rw-r--r-- | fs/nfs/read.c | 174 | ||||
-rw-r--r-- | fs/nfs/super.c | 16 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 37 | ||||
-rw-r--r-- | fs/nfs/write.c | 183 |
31 files changed, 1653 insertions, 592 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 81515545ba7..2cde5d95475 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -77,6 +77,7 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select SUNRPC_BACKCHANNEL select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 84690319e62..c98b439332f 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -113,19 +113,18 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) int nfs_cache_register(struct cache_detail *cd) { - struct nameidata nd; struct vfsmount *mnt; + struct path path; int ret; mnt = rpc_get_mount(); if (IS_ERR(mnt)) return PTR_ERR(mnt); - ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); + ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); if (ret) goto err; - ret = sunrpc_cache_register_pipefs(nd.path.dentry, - cd->name, 0600, cd); - path_put(&nd.path); + ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); + path_put(&path); if (!ret) return ret; err: diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 76f856e284e..7cf6cafcc00 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -6,7 +6,7 @@ #include <linux/completion.h> #include <linux/sunrpc/cache.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Deferred request handling diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb..74780f9f852 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; bool found = false; @@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) - continue; - ino = igrab(lo->plh_inode); - if (!ino) - continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ - get_layout_hdr(lo); - break; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + if (found) + break; } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; @@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; u32 rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, }; spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, - &args->cbl_fsid, sizeof(struct nfs_fsid))) - continue; - if (!igrab(lo->plh_inode)) + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); - list_add(&lo->plh_bulk_recall, &recall_list); + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, &recall_list, plh_bulk_recall) { ino = lo->plh_inode; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65..19ea7d9c75e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; -#if defined(CONFIG_NFS_V4_1) - INIT_LIST_HEAD(&clp->cl_layouts); -#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); kfree(clp->cl_hostname); + kfree(clp->server_scope); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); + INIT_LIST_HEAD(&server->layouts); atomic_set(&server->active, 0); @@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, dprintk("<-- %s %p\n", __func__, clp); return clp; } -EXPORT_SYMBOL(nfs4_set_ds_client); +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2aec37..321a66bc384 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } -static void nfs_mark_return_delegation(struct nfs_delegation *delegation) +static void nfs_mark_return_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } /** @@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; rcu_read_lock(); @@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_unlock(); return -ENOENT; } - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ededdbd0db3..57f578e2560 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -56,7 +56,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *); static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -static int nfs_fsync_dir(struct file *, int); +static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct page*); @@ -945,15 +945,19 @@ out: * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ -static int nfs_fsync_dir(struct file *filp, int datasync) +static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, + int datasync) { struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); + mutex_lock(&inode->i_mutex); nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); + mutex_unlock(&inode->i_mutex); return 0; } @@ -997,14 +1001,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * Return the intent data that applies to this particular path component * * Note that the current set of intents only apply to the very last - * component of the path. - * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. + * component of the path and none of them is set before that last + * component. */ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) { - if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) - return 0; return nd->flags & mask; } @@ -1338,25 +1340,31 @@ static int is_atomic_open(struct nameidata *nd) return 0; /* Are we trying to write to a read only partition? */ if (__mnt_is_readonly(nd->path.mnt) && - (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) return 0; return 1; } -static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) +static fmode_t flags_to_mode(int flags) +{ + fmode_t res = (__force fmode_t)flags & FMODE_EXEC; + if ((flags & O_ACCMODE) != O_WRONLY) + res |= FMODE_READ; + if ((flags & O_ACCMODE) != O_RDONLY) + res |= FMODE_WRITE; + return res; +} + +static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) { - struct path path = { - .mnt = nd->path.mnt, - .dentry = dentry, - }; struct nfs_open_context *ctx; struct rpc_cred *cred; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + fmode_t fmode = flags_to_mode(open_flags); cred = rpc_lookup_cred(); if (IS_ERR(cred)) return ERR_CAST(cred); - ctx = alloc_nfs_open_context(&path, cred, fmode); + ctx = alloc_nfs_open_context(dentry, cred, fmode); put_rpccred(cred); if (ctx == NULL) return ERR_PTR(-ENOMEM); @@ -1376,13 +1384,13 @@ static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ct /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { - ret = nfs_may_open(ctx->path.dentry->d_inode, + ret = nfs_may_open(ctx->dentry->d_inode, ctx->cred, nd->intent.open.flags); if (ret < 0) goto out; } - filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); + filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); if (IS_ERR(filp)) ret = PTR_ERR(filp); else @@ -1420,12 +1428,13 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto out; } - ctx = nameidata_to_nfs_open_context(dentry, nd); + open_flags = nd->intent.open.flags; + + ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); if (IS_ERR(ctx)) goto out; - open_flags = nd->intent.open.flags; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; attr.ia_valid = ATTR_MODE; @@ -1463,8 +1472,8 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = d_add_unique(dentry, inode); nfs_unblock_sillyrename(dentry->d_parent); if (res != NULL) { - dput(ctx->path.dentry); - ctx->path.dentry = dget(res); + dput(ctx->dentry); + ctx->dentry = dget(res); dentry = res; } err = nfs_intent_set_file(nd, ctx); @@ -1517,7 +1526,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); - ctx = nameidata_to_nfs_open_context(dentry, nd); + ctx = create_nfs_open_context(dentry, openflags); ret = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; @@ -1570,7 +1579,7 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nfs_open_context *ctx = NULL; struct iattr attr; int error; - int open_flags = 0; + int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1578,27 +1587,27 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if ((nd->flags & LOOKUP_CREATE) != 0) { + if (nd) open_flags = nd->intent.open.flags; - ctx = nameidata_to_nfs_open_context(dentry, nd); - error = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out_err_drop; - } + ctx = create_nfs_open_context(dentry, open_flags); + error = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err_drop; error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); if (error != 0) goto out_put_ctx; - if (ctx != NULL) { + if (nd) { error = nfs_intent_set_file(nd, ctx); if (error < 0) goto out_err; + } else { + put_nfs_open_context(ctx); } return 0; out_put_ctx: - if (ctx != NULL) - put_nfs_open_context(ctx); + put_nfs_open_context(ctx); out_err_drop: d_drop(dentry); out_err: @@ -1660,7 +1669,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct iattr attr; int error; - int open_flags = 0; + int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1668,7 +1677,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if ((nd->flags & LOOKUP_CREATE) != 0) + if (nd) open_flags = nd->intent.open.flags; error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); @@ -2259,11 +2268,11 @@ static int nfs_open_permission_mask(int openflags) { int mask = 0; - if (openflags & FMODE_READ) + if ((openflags & O_ACCMODE) != O_WRONLY) mask |= MAY_READ; - if (openflags & FMODE_WRITE) + if ((openflags & O_ACCMODE) != O_RDONLY) mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) + if (openflags & __FMODE_EXEC) mask |= MAY_EXEC; return mask; } @@ -2273,12 +2282,12 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask, unsigned int flags) +int nfs_permission(struct inode *inode, int mask) { struct rpc_cred *cred; int res = 0; - if (flags & IPERM_FLAG_RCU) + if (mask & MAY_NOT_BLOCK) return -ECHILD; nfs_inc_stats(inode, NFSIOS_VFSACCESS); @@ -2328,7 +2337,7 @@ out: out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) - res = generic_permission(inode, mask, flags, NULL); + res = generic_permission(inode, mask); goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8eea2536671..1940f1a56a5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -53,7 +53,7 @@ #include <asm/system.h> #include <asm/uaccess.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "internal.h" #include "iostat.h" @@ -284,7 +284,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; size_t rsize = NFS_SERVER(inode)->rsize; @@ -715,7 +715,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; struct rpc_task *task; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2f093ed1698..28b8c3f3cda 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -55,7 +55,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); -static int nfs_file_fsync(struct file *, int datasync); +static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -187,8 +187,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) filp->f_path.dentry->d_name.name, offset, origin); - /* origin == SEEK_END => we must revalidate the cached file length */ - if (origin == SEEK_END) { + /* + * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * the cached file length + */ + if (origin != SEEK_SET || origin != SEEK_CUR) { struct inode *inode = filp->f_mapping->host; int retval = nfs_revalidate_file_size(inode, filp); @@ -305,7 +308,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * fall back to doing a synchronous write. */ static int -nfs_file_fsync(struct file *file, int datasync) +nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -313,11 +316,15 @@ nfs_file_fsync(struct file *file, int datasync) int have_error, status; int ret = 0; - dprintk("NFS: fsync file(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); @@ -329,6 +336,7 @@ nfs_file_fsync(struct file *file, int datasync) if (!ret && !datasync) /* application has asked for meta-data sync */ ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ce153a6b3ae..419119c371b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode) dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - /* Need to invalidate any mapped pages that were read in before - * turning off the cache. + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. */ - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2(inode->i_mapping); - + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); nfs_fscache_zap_inode_cookie(inode); } } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 79664a1025a..f20801ae0a1 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -36,6 +36,8 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/nfs_idmap.h> static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) { @@ -59,12 +61,10 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) #ifdef CONFIG_NFS_USE_NEW_IDMAPPER -#include <linux/slab.h> #include <linux/cred.h> #include <linux/sunrpc/sched.h> #include <linux/nfs4.h> #include <linux/nfs_fs_sb.h> -#include <linux/nfs_idmap.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <linux/rcupdate.h> @@ -284,18 +284,15 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, #include <linux/module.h> #include <linux/mutex.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/sched.h> - #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/nfs_fs.h> -#include <linux/nfs_idmap.h> #include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6f4850deb27..fe1203797b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -567,7 +567,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) { struct nfs_lock_context *res, *new = NULL; - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); @@ -594,7 +594,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) void nfs_put_lock_context(struct nfs_lock_context *l_ctx) { struct nfs_open_context *ctx = l_ctx->open_context; - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; @@ -620,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; if (!is_sync) return; - inode = ctx->path.dentry->d_inode; + inode = ctx->dentry->d_inode; if (!list_empty(&NFS_I(inode)->open_files)) return; server = NFS_SERVER(inode); @@ -629,14 +629,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - ctx->path = *path; - path_get(&ctx->path); + nfs_sb_active(dentry->d_sb); + ctx->dentry = dget(dentry); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->mode = f_mode; @@ -658,7 +658,8 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; + struct super_block *sb = ctx->dentry->d_sb; if (!list_empty(&ctx->list)) { if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) @@ -671,7 +672,8 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) put_rpccred(ctx->cred); - path_put(&ctx->path); + dput(ctx->dentry); + nfs_sb_deactive(sb); kfree(ctx); } @@ -741,7 +743,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); + ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2da..ab12913dd47 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +#ifdef CONFIG_NFS_V4 +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); +#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, @@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pageio_descriptor; /* read.c */ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct list_head *head); + +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct list_head *head); +extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063bacd28..8102391bb37 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,7 +119,7 @@ Elong: } #ifdef CONFIG_NFS_V4 -static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 27434277165..e49e73107e6 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -427,16 +427,12 @@ int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, } if (!dfacl) return 0; - acl = posix_acl_clone(dfacl, GFP_KERNEL); - error = -ENOMEM; - if (!acl) - goto out_release_dfacl; - error = posix_acl_create_masq(acl, &mode); + acl = posix_acl_dup(dfacl); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); if (error < 0) - goto out_release_acl; + goto out_release_dfacl; error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? dfacl : NULL); -out_release_acl: posix_acl_release(acl); out_release_dfacl: posix_acl_release(dfacl); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c4a69833dd0..1909ee8be35 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -48,6 +48,7 @@ enum nfs4_client_state { NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, + NFS4CLNT_SERVER_SCOPE_MISMATCH, }; enum nfs4_session_state { @@ -66,6 +67,8 @@ struct nfs4_minor_version_ops { int cache_reply); int (*validate_stateid)(struct nfs_delegation *, const nfs4_stateid *); + int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, + struct nfs_fsinfo *); const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; @@ -238,7 +241,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); +extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); @@ -341,14 +344,16 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); -extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); +extern void nfs4_close_state(struct nfs4_state *, fmode_t); +extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); +extern void nfs41_handle_server_scope(struct nfs_client *, + struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); @@ -373,8 +378,8 @@ extern struct svc_version nfs4_callback_version4; #else -#define nfs4_close_state(a, b, c) do { } while (0) -#define nfs4_close_sync(a, b, c) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_close_sync(a, b) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0bafcc91c27..be93a622872 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) __func__, data->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s USE DS:ip %x %hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); /* No multipath support. Use first DS */ data->ds_clp = ds->ds_clp; @@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_fh *fh; int status; + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, data->inode->i_ino, sync, (size_t) data->args.count, offset, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; data->ds_clp = ds->ds_clp; @@ -398,7 +403,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) * this offset and save the original offset. */ data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; /* Perform an asynchronous write */ status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, @@ -429,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + goto out; + } + if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -450,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is being reaped */ + if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + goto out_put; + fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || @@ -660,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, * return true : coalesce page * return false : don't coalesce page */ -bool +static bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -671,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - if (!pgio->pg_lseg) - return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; @@ -683,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +void +filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_read_mds(pgio); +} + +void +filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_write_mds(pgio); +} + +static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_init = filelayout_pg_init_read, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_init = filelayout_pg_init_write, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_writepages, +}; + static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) { return !FILELAYOUT_LSEG(lseg)->commit_through_mds; @@ -880,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .owner = THIS_MODULE, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, - .pg_test = filelayout_pg_test, + .pg_read_ops = &filelayout_pg_read_ops, + .pg_write_ops = &filelayout_pg_write_ops, .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, .commit_pagelist = filelayout_commit_pagelist, @@ -903,5 +964,7 @@ static void __exit nfs4filelayout_exit(void) pnfs_unregister_layoutdriver(&filelayout_type); } +MODULE_ALIAS("nfs-layouttype4-1"); + module_init(nfs4filelayout_init); module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e3795..2e42284253f 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -47,10 +47,17 @@ enum stripetype4 { }; /* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - u32 ds_ip_addr; - u32 ds_port; + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; struct nfs_client *ds_clp; atomic_t ds_count; }; @@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } +static inline struct nfs4_deviceid_node * +FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) +{ + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf137726..ed388aae968 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds) printk("%s NULL device\n", __func__); return; } - printk(" ip_addr %x port %hu\n" + printk(" ds %s\n" " ref count %d\n" " client %p\n" " cl_exchange_flags %x\n", - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + ds->ds_remotestr, atomic_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -/* nfs4_ds_cache_lock is held */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(u32 ip_addr, u32 port) +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) { - struct nfs4_pnfs_ds *ds; + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; + + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } - dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", - ntohl(ip_addr), ntohs(port)); + return false; +} - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (ds->ds_ip_addr == ip_addr && - ds->ds_port == port) { - return ds; +/* + * Lookup DS by addresses. The first matching address returns true. + * nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds_addr *da1, *da2; + + list_for_each_entry(da1, dsaddrs, da_node) { + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + list_for_each_entry(da2, &ds->ds_addrs, da_node) { + if (same_sockaddr( + (struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return ds; + } } } return NULL; } /* + * Compare two lists of addresses. + */ +static bool +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, + struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + size_t count1 = 0, + count2 = 0; + + list_for_each_entry(da1, dsaddrs1, da_node) + count1++; + + list_for_each_entry(da2, dsaddrs2, da_node) { + bool found = false; + count2++; + list_for_each_entry(da1, dsaddrs1, da_node) { + if (same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) { + found = true; + break; + } + } + if (!found) + return false; + } + + return (count1 == count2); +} + +/* * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only support IPv4 + * Currently only supports IPv4 and IPv6 addresses */ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { - struct nfs_client *clp; - struct sockaddr_in sin; + struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = ds->ds_ip_addr; - sin.sin_port = ds->ds_port; + BUG_ON(list_empty(&ds->ds_addrs)); + + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP); + if (!IS_ERR(clp)) + break; + } - clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, - sizeof(sin), IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; } ds->ds_clp = clp; - dprintk("%s [existing] ip=%x, port=%hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s [existing] server=%s\n", __func__, + ds->ds_remotestr); goto out; } @@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; ds->ds_clp = clp; - dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), - ntohs(ds->ds_port)); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; out_put: @@ -147,12 +231,25 @@ out_put: static void destroy_ds(struct nfs4_pnfs_ds *ds) { + struct nfs4_pnfs_ds_addr *da; + dprintk("--> %s\n", __func__); ifdebug(FACILITY) print_ds(ds); if (ds->ds_clp) nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + kfree(ds->ds_remotestr); kfree(ds); } @@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da; + char *remotestr; + size_t len; + char *p; + + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ + } + + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) + return NULL; + + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); + + if (ll > len) + goto out_err; + + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; + return remotestr; +out_err: + kfree(remotestr); + return NULL; +} + static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *tmp_ds, *ds; + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; - ds = kzalloc(sizeof(*tmp_ds), gfp_flags); + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); if (!ds) goto out; + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); + spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(ip_addr, port); + tmp_ds = _data_server_lookup_locked(dsaddrs); if (tmp_ds == NULL) { - ds->ds_ip_addr = ip_addr; - ds->ds_port = port; + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); + ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server ip 0x%x\n", __func__, - ds->ds_ip_addr); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); } else { + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, + dsaddrs)) { + dprintk("%s: multipath address mismatch: %s != %s", + __func__, tmp_ds->ds_remotestr, remotestr); + } + kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_ip_addr, + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, atomic_read(&tmp_ds->ds_count)); ds = tmp_ds; } @@ -213,18 +375,22 @@ out: } /* - * Currently only support ipv4, and one multi-path address. + * Currently only supports ipv4, ipv6 and one multi-path address. */ -static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) +static struct nfs4_pnfs_ds_addr * +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *ds = NULL; - char *buf; - const char *ipend, *pstr; - u32 ip_addr, port; - int nlen, rlen, i; + struct nfs4_pnfs_ds_addr *da = NULL; + char *buf, *portstr; + u32 port; + int nlen, rlen; int tmp[2]; __be32 *p; + char *netid, *match_netid; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla if (unlikely(!p)) goto out_err; - /* Check that netid is "tcp" */ - if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { - dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) goto out_err; - } - /* r_addr */ + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ p = xdr_inline_decode(streamp, 4); if (unlikely(!p)) - goto out_err; + goto out_free_netid; rlen = be32_to_cpup(p); p = xdr_inline_decode(streamp, rlen); if (unlikely(!p)) - goto out_err; + goto out_free_netid; - /* ipv6 length plus port is legal */ - if (rlen > INET6_ADDRSTRLEN + 8) { + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { dprintk("%s: Invalid address, length %d\n", __func__, rlen); - goto out_err; + goto out_free_netid; } buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); - goto out_err; + goto out_free_netid; } buf[rlen] = '\0'; memcpy(buf, p, rlen); - /* replace the port dots with dashes for the in4_pton() delimiter*/ - for (i = 0; i < 2; i++) { - char *res = strrchr(buf, '.'); - if (!res) { - dprintk("%s: Failed finding expected dots in port\n", - __func__); - goto out_free; - } - *res = '-'; + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; } + *portstr = '\0'; - /* Currently only support ipv4 address */ - if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { - dprintk("%s: Only ipv4 addresses supported\n", __func__); - goto out_free; + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) + goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; } - /* port */ - pstr = ipend; - sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); - dprintk("%s: Decoded address and port %s\n", __func__, buf); -out_free: + switch (da->da_addr.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + startsep = "["; + endsep = "]"; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, da->da_addr.ss_family); + goto out_free_da; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_da; + } + + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); +out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); + kfree(buf); +out_free_netid: + kfree(netid); out_err: - return ds; + return NULL; } /* Decode opaque device data and return the result */ @@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) NFS_SERVER(ino)->nfs_client, &pdev->dev_id); + INIT_LIST_HEAD(&dsaddrs); + for (i = 0; i < dsaddr->ds_num; i++) { int j; u32 mp_count; @@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) goto out_err_free_deviceid; mp_count = be32_to_cpup(p); /* multipath count */ - if (mp_count > 1) { - printk(KERN_WARNING - "%s: Multipath count %d not supported, " - "skipping all greater than 1\n", __func__, - mp_count); - } for (j = 0; j < mp_count; j++) { - if (j == 0) { - dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino, gfp_flags); - if (dsaddr->ds_list[i] == NULL) - goto out_err_free_deviceid; - } else { - u32 len; - /* skip extra multipath */ - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - } + da = decode_ds_addr(&stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + goto out_err_free_deviceid; + } + + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!dsaddr->ds_list[i]) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); } } __free_page(scratch); return dsaddr; +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } out_err_free_deviceid: nfs4_fl_free_deviceid(dsaddr); /* stripe_indicies was part of dsaddr */ @@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, - int err, u32 ds_addr) + int err, const char *ds_remotestr) { u32 *p = (u32 *)&dsaddr->id_node.deviceid; - printk(KERN_ERR "NFS: data server %x connection error %d." + printk(KERN_ERR "NFS: data server %s connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", - ds_addr, err, p[0], p[1], p[2], p[3]); + ds_remotestr, err, p[0], p[1], p[2], p[3]); spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; @@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) err = nfs4_ds_connect(s, ds); if (err) { filelayout_mark_devid_negative(dsaddr, err, - ntohl(ds->ds_ip_addr)); + ds->ds_remotestr); return NULL; } } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5879b23e0c9..079614deca3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); - +#ifdef CONFIG_NFS_V4_1 +static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +#endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { @@ -763,8 +766,8 @@ struct nfs4_opendata { struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; - struct path path; struct dentry *dir; + struct dentry *dentry; struct nfs4_state_owner *owner; struct nfs4_state *state; struct iattr attrs; @@ -786,12 +789,12 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) nfs_fattr_init(&p->dir_attr); } -static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, +static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, const struct iattr *attrs, gfp_t gfp_mask) { - struct dentry *parent = dget_parent(path->dentry); + struct dentry *parent = dget_parent(dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; @@ -802,8 +805,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); if (p->o_arg.seqid == NULL) goto err_free; - path_get(path); - p->path = *path; + nfs_sb_active(dentry->d_sb); + p->dentry = dget(dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -812,7 +815,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_owner_id.id; - p->o_arg.name = &p->path.dentry->d_name; + p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; @@ -842,13 +845,15 @@ static void nfs4_opendata_free(struct kref *kref) { struct nfs4_opendata *p = container_of(kref, struct nfs4_opendata, kref); + struct super_block *sb = p->dentry->d_sb; nfs_free_seqid(p->o_arg.seqid); if (p->state != NULL) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); dput(p->dir); - path_put(&p->path); + dput(p->dentry); + nfs_sb_deactive(sb); kfree(p); } @@ -1130,7 +1135,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS); + opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -1154,7 +1159,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod newstate = nfs4_opendata_to_nfs4_state(opendata); if (IS_ERR(newstate)) return PTR_ERR(newstate); - nfs4_close_state(&opendata->path, newstate, fmode); + nfs4_close_state(newstate, fmode); *res = newstate; return 0; } @@ -1352,7 +1357,7 @@ static void nfs4_open_confirm_release(void *calldata) goto out_free; state = nfs4_opendata_to_nfs4_state(data); if (!IS_ERR(state)) - nfs4_close_state(&data->path, state, data->o_arg.fmode); + nfs4_close_state(state, data->o_arg.fmode); out_free: nfs4_opendata_put(data); } @@ -1497,7 +1502,7 @@ static void nfs4_open_release(void *calldata) goto out_free; state = nfs4_opendata_to_nfs4_state(data); if (!IS_ERR(state)) - nfs4_close_state(&data->path, state, data->o_arg.fmode); + nfs4_close_state(state, data->o_arg.fmode); out_free: nfs4_opendata_put(data); } @@ -1648,7 +1653,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) - d_drop(ctx->path.dentry); + d_drop(ctx->dentry); nfs4_opendata_put(opendata); return ret; } @@ -1687,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_open_expired(sp, state); +} +#endif + /* * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* * fields corresponding to attributes that were used to store the verifier. @@ -1706,7 +1725,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -1723,15 +1742,15 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = nfs4_recover_expired_lease(server); if (status != 0) goto err_put_state_owner; - if (path->dentry->d_inode != NULL) - nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); + if (dentry->d_inode != NULL) + nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; - if (path->dentry->d_inode != NULL) - opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); + if (dentry->d_inode != NULL) + opendata->state = nfs4_get_open_state(dentry->d_inode, sp); status = _nfs4_proc_open(opendata); if (status != 0) @@ -1769,14 +1788,14 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1873,7 +1892,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, } struct nfs4_closedata { - struct path path; struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; @@ -1888,13 +1906,14 @@ static void nfs4_free_closedata(void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state_owner *sp = calldata->state->owner; + struct super_block *sb = calldata->state->inode->i_sb; if (calldata->roc) pnfs_roc_release(calldata->state->inode); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - path_put(&calldata->path); + nfs_sb_deactive(sb); kfree(calldata); } @@ -2014,7 +2033,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) +int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -2050,8 +2069,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->roc = roc; - path_get(path); - calldata->path = *path; + nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; @@ -2080,7 +2098,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags struct nfs4_state *state; /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); + state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); if (IS_ERR(state)) return ERR_CAST(state); ctx->state = state; @@ -2092,9 +2110,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) if (ctx->state == NULL) return; if (is_sync) - nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); + nfs4_close_sync(ctx->state, ctx->mode); else - nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + nfs4_close_state(ctx->state, ctx->mode); } static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) @@ -2251,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + int minor_version = server->nfs_client->cl_minorversion; int status = nfs4_lookup_root(server, fhandle, info); if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) /* * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM * by nfs4_map_errors() as this function exits. */ - status = nfs4_find_root_sec(server, fhandle, info); + status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) @@ -2616,10 +2635,7 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nfs_open_context *ctx) { - struct path my_path = { - .dentry = dentry, - }; - struct path *path = &my_path; + struct dentry *de = dentry; struct nfs4_state *state; struct rpc_cred *cred = NULL; fmode_t fmode = 0; @@ -2627,11 +2643,11 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (ctx != NULL) { cred = ctx->cred; - path = &ctx->path; + de = ctx->dentry; fmode = ctx->mode; } sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); + state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2642,7 +2658,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (ctx != NULL) ctx->state = state; else - nfs4_close_sync(path, state, fmode); + nfs4_close_sync(state, fmode); out: return status; } @@ -4294,7 +4310,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; - renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); + renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } out: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); @@ -4443,6 +4459,20 @@ out: return err; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_lock_expired(state, request); +} +#endif + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -4781,6 +4811,16 @@ out_inval: return -NFS4ERR_INVAL; } +static bool +nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +{ + if (a->server_scope_sz == b->server_scope_sz && + memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) + return true; + + return false; +} + /* * nfs4_proc_exchange_id() * @@ -4823,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) init_utsname()->domainname, clp->cl_rpcclient->cl_auth->au_flavor); + res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); + if (unlikely(!res.server_scope)) + return -ENOMEM; + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + + if (!status) { + if (clp->server_scope && + !nfs41_same_server_scope(clp->server_scope, + res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->server_scope); + clp->server_scope = NULL; + } + + if (!clp->server_scope) + clp->server_scope = res.server_scope; + else + kfree(res.server_scope); + } + dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -5706,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); @@ -5735,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - put_layout_hdr(NFS_I(lrp->args.inode)->layout); + put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } @@ -5903,6 +5965,143 @@ out: rpc_put_task(task); return status; } + +static int +_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs41_secinfo_no_name_args args = { + .style = SECINFO_STYLE_CURRENT_FH, + }; + struct nfs4_secinfo_res res = { + .flavors = flavors, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); +} + +static int +nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + switch (err) { + case 0: + case -NFS4ERR_WRONGSEC: + case -NFS4ERR_NOTSUPP: + break; + default: + err = nfs4_handle_exception(server, err, &exception); + } + } while (exception.retry); + return err; +} + +static int +nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int err; + struct page *page; + rpc_authflavor_t flavor; + struct nfs4_secinfo_flavors *flavors; + + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + flavors = page_address(page); + err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + + /* + * Fall back on "guess and check" method if + * the server doesn't support SECINFO_NO_NAME + */ + if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + err = nfs4_find_root_sec(server, fhandle, info); + goto out_freepage; + } + if (err) + goto out_freepage; + + flavor = nfs_find_best_sec(flavors); + if (err == 0) + err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + +out_freepage: + put_page(page); + if (err == -EACCES) + return -EPERM; +out: + return err; +} +static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_test_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_test_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs41_test_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_free_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_free_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_free_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -5939,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, - .recover_lock = nfs4_lock_expired, + .recover_open = nfs41_open_expired, + .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; @@ -5964,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, .validate_stateid = nfs4_validate_delegation_stateid, + .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, @@ -5974,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, .validate_stateid = nfs41_validate_delegation_stateid, + .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e97dd219f84..72ab97ef3d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -641,7 +641,7 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -static void __nfs4_close(struct path *path, struct nfs4_state *state, +static void __nfs4_close(struct nfs4_state *state, fmode_t fmode, gfp_t gfp_mask, int wait) { struct nfs4_state_owner *owner = state->owner; @@ -685,18 +685,18 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, } else { bool roc = pnfs_roc(state->inode); - nfs4_do_close(path, state, gfp_mask, wait, roc); + nfs4_do_close(state, gfp_mask, wait, roc); } } -void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) +void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, fmode, GFP_NOFS, 0); + __nfs4_close(state, fmode, GFP_NOFS, 0); } -void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) +void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, fmode, GFP_KERNEL, 1); + __nfs4_close(state, fmode, GFP_KERNEL, 1); } /* @@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, + &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + else + set_bit(NFS4CLNT_RECLAIM_REBOOT, + &clp->cl_state); + pnfs_destroy_all_layouts(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6870bc61cee..c191a9baa42 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -91,7 +91,7 @@ static int nfs4_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define nfs4_fattr_bitmap_maxsz 3 +#define nfs4_fattr_bitmap_maxsz 4 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) @@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int); 1 /* FIXME: opaque lrf_body always empty at the moment */) #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 1 + decode_stateid_maxsz) +#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) +#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz +#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz) +#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz +\ + encode_secinfo_no_name_maxsz) +#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_secinfo_no_name_maxsz) +#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_test_stateid_maxsz) +#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_test_stateid_maxsz) +#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_free_stateid_maxsz) +#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_free_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_layoutreturn_maxsz; } + +static int +encode_secinfo_no_name(struct xdr_stream *xdr, + const struct nfs41_secinfo_no_name_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); + *p++ = cpu_to_be32(args->style); + hdr->nops++; + hdr->replen += decode_secinfo_no_name_maxsz; + return 0; +} + +static void encode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_TEST_STATEID); + *p++ = cpu_to_be32(1); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_test_stateid_maxsz; +} + +static void encode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_FREE_STATEID); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_free_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, encode_layoutreturn(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode SECINFO_NO_NAME request + */ +static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_secinfo_no_name_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_secinfo_no_name(xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode TEST_STATEID request + */ +static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_test_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode FREE_STATEID request + */ +static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_free_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, if (unlikely(status)) return status; - /* Throw away server_scope */ + /* Save server_scope */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + + memcpy(res->server_scope->server_scope, dummy_str, dummy); + res->server_scope->server_scope_sz = dummy; + /* Throw away Implementation id array */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) @@ -5322,6 +5449,55 @@ out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } + +static int decode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + __be32 *p; + int status; + int num_res; + + status = decode_op_hdr(xdr, OP_TEST_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num_res = be32_to_cpup(p++); + if (num_res != 1) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); +out: + return -EIO; +} + +static int decode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_FREE_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode SECINFO_NO_NAME response + */ +static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_secinfo_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putrootfh(xdr); + if (status) + goto out; + status = decode_secinfo(xdr, res); +out: + return status; +} + +/* + * Decode TEST_STATEID response + */ +static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_test_stateid(xdr, res); +out: + return status; +} + +/* + * Decode FREE_STATEID response + */ +static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_free_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6663,6 +6905,9 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), + PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), + PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), + PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3f10e..9383ca7245b 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, if (!pnfs_generic_pg_test(pgio, prev, req)) return false; - if (pgio->pg_lseg == NULL) - return true; - return pgio->pg_count + req->wb_bytes <= OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } +static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, + .pg_test = objio_pg_test, + .pg_doio = pnfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, + .pg_test = objio_pg_test, + .pg_doio = pnfs_generic_pg_writepages, +}; + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = objio_pg_test, + .pg_read_ops = &objio_pg_read_ops, + .pg_write_ops = &objio_pg_write_ops, .free_deviceid_node = objio_free_deviceid_node, @@ -1055,5 +1065,7 @@ objlayout_exit(void) __func__); } +MODULE_ALIAS("nfs-layouttype4-2"); + module_init(objlayout_init); module_exit(objlayout_exit); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 00985571628..b60970cc7f1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -114,7 +114,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) if (!nfs_lock_request_dontget(req)) return 0; if (test_bit(PG_MAPPED, &req->wb_flags)) - radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -124,7 +124,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) void nfs_clear_page_tag_locked(struct nfs_page *req) { if (test_bit(PG_MAPPED, &req->wb_flags)) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int io_flags) { @@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_bsize = bsize; desc->pg_base = 0; desc->pg_moreio = 0; + desc->pg_recoalesce = 0; desc->pg_inode = inode; - desc->pg_doio = doio; + desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; - desc->pg_test = nfs_generic_pg_test; - pnfs_pageio_init(desc, inode); } /** @@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; - return pgio->pg_test(pgio, prev, req); + return pgio->pg_ops->pg_test(pgio, prev, req); } /** @@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; } else { + if (desc->pg_ops->pg_init) + desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } nfs_list_remove_request(req); @@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { - int error = desc->pg_doio(desc); + int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_error < 0) return 0; desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; } return 1; } +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + + do { + list_splice_init(&desc->pg_list, &head); + desc->pg_bytes_written -= desc->pg_count; + desc->pg_count = 0; + desc->pg_base = 0; + desc->pg_recoalesce = 0; + + while (!list_empty(&head)) { + struct nfs_page *req; + + req = list_first_entry(&head, struct nfs_page, wb_list); + nfs_list_remove_request(req); + if (__nfs_pageio_add_request(desc, req)) + continue; + if (desc->pg_error < 0) + return 0; + break; + } + } while (desc->pg_recoalesce); + return 1; +} + +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + int ret; + + do { + ret = __nfs_pageio_add_request(desc, req); + if (ret) + break; + if (desc->pg_error < 0) + break; + ret = nfs_do_recoalesce(desc); + } while (ret); + return ret; +} + /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - nfs_pageio_doio(desc); + for (;;) { + nfs_pageio_doio(desc); + if (!desc->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } } /** @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) if (!list_empty(&desc->pg_list)) { struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); if (index != prev->wb_index + 1) - nfs_pageio_doio(desc); + nfs_pageio_complete(desc); } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc34..38e5508555c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -28,6 +28,7 @@ */ #include <linux/nfs_fs.h> +#include <linux/nfs_page.h> #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) void pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); + spin_lock(&clp->cl_lock); - list_splice_init(&clp->cl_layouts, &tmp_list); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); + } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); while (!list_empty(&tmp_list)) { @@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; status = nfs4_proc_layoutreturn(lrp); @@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino, }; unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; bool first = false; @@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino, */ spin_lock(&clp->cl_lock); BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } @@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino, arg.offset -= pg_offset; arg.length += pg_offset; } - arg.length = PAGE_CACHE_ALIGN(arg.length); + if (arg.length != NFS4_MAX_UINT64) + arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { @@ -991,6 +1004,7 @@ out_unlock: spin_unlock(&ino->i_lock); goto out; } +EXPORT_SYMBOL_GPL(pnfs_update_layout); int pnfs_layout_process(struct nfs4_layoutget *lgp) @@ -1048,35 +1062,71 @@ out_forget_reply: goto out; } +void +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_read_mds(pgio); + +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); + +void +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_write_mds(pgio); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); + bool -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { - enum pnfs_iomode access_type; - gfp_t gfp_flags; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - /* We assume that pg_ioflags == 0 iff we're reading a page */ - if (pgio->pg_ioflags == 0) { - access_type = IOMODE_READ; - gfp_flags = GFP_KERNEL; - } else { - access_type = IOMODE_RW; - gfp_flags = GFP_NOFS; - } + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + return true; +} - if (pgio->pg_lseg == NULL) { - if (pgio->pg_count != prev->wb_bytes) - return true; - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(prev), - pgio->pg_count, - access_type, - gfp_flags); - if (pgio->pg_lseg == NULL) - return true; - } +bool +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + return true; +} + +bool +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_lseg == NULL) + return nfs_generic_pg_test(pgio, prev, req); /* * Test if a nfs_page is fully contained in the pnfs_layout_range. @@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); -enum pnfs_try_status +static void +pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_write_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + nfs_writedata_release(data); +} + +static enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, - const struct rpc_call_ops *call_ops, int how) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) { struct inode *inode = wdata->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); wdata->mds_ops = call_ops; + wdata->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); @@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +static void +pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +{ + struct nfs_write_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); + /* * Called by non rpc-based layout drivers */ @@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); +static void +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_read_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + nfs_readdata_release(data); +} + /* * Call the appropriate parallel I/O subsystem read function. */ -enum pnfs_try_status +static enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *rdata, - const struct rpc_call_ops *call_ops) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) { struct inode *inode = rdata->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; rdata->mds_ops = call_ops; + rdata->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); @@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, return trypnfs; } +static void +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + struct nfs_read_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_reads(desc, &head); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); + /* * Currently there is only one (whole file) write lseg. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6f45b..078670dfbe0 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type { void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + const struct nfs_pageio_ops *pg_read_ops; + const struct nfs_pageio_ops *pg_write_ops; /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags); + +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); + void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); -enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, - const struct rpc_call_ops *, int); -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, - const struct rpc_call_ops *); +void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); +struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + gfp_t gfp_flags); + +void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); + +/* nfs4_deviceid_flags */ +enum { + NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ +}; /* pnfs_dev.c */ struct nfs4_deviceid_node { @@ -189,13 +203,13 @@ struct nfs4_deviceid_node { struct hlist_node tmpnode; const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; + unsigned long flags; struct nfs4_deviceid deviceid; atomic_t ref; }; void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, @@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld) - pgio->pg_test = ld->pg_test; -} - #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags) -{ - return NULL; -} - -static inline enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) -{ - return PNFS_NOT_ATTEMPTED; -} - -static inline enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, int how) -{ - return PNFS_NOT_ATTEMPTED; -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; @@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { + return false; +} + +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + return false; } static inline void diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e22f6..6fda5228ef5 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_lock(); d = _lookup_deviceid(ld, clp, id, hash); - if (d && !atomic_inc_not_zero(&d->ref)) - d = NULL; + if (d != NULL) + atomic_inc(&d->ref); rcu_read_unlock(); return d; } @@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); /* - * Unhash and put deviceid + * Remove a deviceid from cache * * @clp nfs_client associated with deviceid * @id the deviceid to unhash * * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. */ -struct nfs4_deviceid_node * -nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, +void +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; @@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_unlock(); if (!d) { spin_unlock(&nfs4_deviceid_lock); - return NULL; + return; } hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); @@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, /* balance the initial ref set in pnfs_insert_deviceid */ if (atomic_dec_and_test(&d->ref)) - return d; - - return NULL; -} -EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); - -/* - * Delete a deviceid from cache - * - * @clp struct nfs_client qualifying the deviceid - * @id deviceid to delete - */ -void -nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, - const struct nfs_client *clp, const struct nfs4_deviceid *id) -{ - struct nfs4_deviceid_node *d; - - d = nfs4_unhash_put_deviceid(ld, clp, id); - if (!d) - return; - d->ld->free_deviceid_node(d); + d->ld->free_deviceid_node(d); } EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); @@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, INIT_HLIST_NODE(&d->tmpnode); d->ld = ld; d->nfs_client = nfs_client; + d->flags = 0; d->deviceid = *id; atomic_set(&d->ref, 1); } @@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); * * @d deviceid node to put * - * @ret true iff the node was deleted + * return true iff the node was deleted + * Note that since the test for d->ref == 0 is sufficient to establish + * that the node is no longer hashed in the global device id cache. */ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { - if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + if (!atomic_dec_and_test(&d->ref)) return false; - hlist_del_init_rcu(&d->node); - spin_unlock(&nfs4_deviceid_lock); - synchronize_rcu(); d->ld->free_deviceid_node(d); return true; } @@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) _deviceid_purge_client(clp, h); } + +/* + * Stop use of all deviceids associated with an nfs_client + */ +void +nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + int i; + + rcu_read_lock(); + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + if (d->nfs_client == clp) + set_bit(NFS_DEVICEID_INVALID, &d->flags); + } + rcu_read_unlock(); +} diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a7f952e24..2171c043ab0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,8 +30,7 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); +static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_read_data *rdata) { put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); @@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + NFS_SERVER(inode)->rsize, 0); +} + +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; +} +EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); + +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + if (!pnfs_pageio_init_read(pgio, inode)) + nfs_pageio_init_read_mds(pgio, inode); +} + int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -131,20 +151,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init(&pgio, inode, NULL, 0, 0); - nfs_list_add_request(new, &pgio.pg_list); - pgio.pg_count = len; - - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(&pgio); - else - nfs_pagein_one(&pgio); + nfs_pageio_init_read(&pgio, inode); + nfs_pageio_add_request(&pgio, new); + nfs_pageio_complete(&pgio); return 0; } static void nfs_readpage_release(struct nfs_page *req) { - struct inode *d_inode = req->wb_context->path.dentry->d_inode; + struct inode *d_inode = req->wb_context->dentry->d_inode; if (PageUptodate(req->wb_page)) nfs_readpage_to_fscache(d_inode, req->wb_page, 0); @@ -152,8 +167,8 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_release_request(req); @@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg) +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; data->req = req; data->inode = inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.count = count; data->res.eof = 0; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_read(struct nfs_read_data *data, + const struct rpc_call_ops *call_ops) +{ + struct inode *inode = data->args.context->dentry->d_inode; return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } +static int +nfs_do_multiple_reads(struct list_head *head, + const struct rpc_call_ops *call_ops) +{ + struct nfs_read_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_read(data, call_ops); + if (ret == 0) + ret = ret2; + } + return ret; +} + static void nfs_async_read_error(struct list_head *head) { @@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_read_data *data; - size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; + size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; - LIST_HEAD(list); nfs_list_remove_request(req); + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); @@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + data->pagevec[0] = page; + nfs_read_rpcsetup(req, data, len, offset); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - - BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_read_data, pages); - list_del_init(&data->pages); - - data->pagevec[0] = page; - - if (nbytes < rsize) - rsize = nbytes; - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset, lseg); - if (ret == 0) - ret = ret2; - offset += rsize; - nbytes -= rsize; - } while (nbytes != 0); - put_lseg(lseg); - desc->pg_lseg = NULL; - + desc->pg_rpc_callops = &nfs_read_partial_ops; return ret; - out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, pages); - list_del(&data->pages); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_read_data, list); + list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); @@ -327,19 +336,19 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret = -ENOMEM; + int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_async_read_error(head); + ret = -ENOMEM; goto out; } @@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, - 0, lseg); + nfs_read_rpcsetup(req, data, desc->pg_count, 0); + list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_read_full_ops; out: - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; } +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc, head); + return nfs_pagein_one(desc, head); +} + +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); + return ret; +} + +static const struct nfs_pageio_ops nfs_pageio_read_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, .pgio = &pgio, }; struct inode *inode = mapping->host; - struct nfs_server *server = NFS_SERVER(inode); - size_t rsize = server->rsize; unsigned long npages; int ret = -ESTALE; @@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + nfs_pageio_init_read(&pgio, inode); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ce40e5c568b..b961ceac66b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2773,16 +2773,12 @@ static void nfs_referral_loop_unprotect(void) static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, const char *export_path) { - struct nameidata *nd = NULL; struct mnt_namespace *ns_private; struct super_block *s; struct dentry *dentry; + struct path path; int ret; - nd = kmalloc(sizeof(*nd), GFP_KERNEL); - if (nd == NULL) - return ERR_PTR(-ENOMEM); - ns_private = create_mnt_ns(root_mnt); ret = PTR_ERR(ns_private); if (IS_ERR(ns_private)) @@ -2793,7 +2789,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, goto out_put_mnt_ns; ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, - export_path, LOOKUP_FOLLOW, nd); + export_path, LOOKUP_FOLLOW, &path); nfs_referral_loop_unprotect(); put_mnt_ns(ns_private); @@ -2801,12 +2797,11 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, if (ret != 0) goto out_err; - s = nd->path.mnt->mnt_sb; + s = path.mnt->mnt_sb; atomic_inc(&s->s_active); - dentry = dget(nd->path.dentry); + dentry = dget(path.dentry); - path_put(&nd->path); - kfree(nd); + path_put(&path); down_write(&s->s_umount); return dentry; out_put_mnt_ns: @@ -2814,7 +2809,6 @@ out_put_mnt_ns: out_mntput: mntput(root_mnt); out_err: - kfree(nd); return ERR_PTR(ret); } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c2a5f..b2fbbde58e4 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n alias = d_lookup(parent, &data->args.name); if (alias != NULL) { - int ret = 0; + int ret; void *devname_garbage = NULL; /* @@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * the sillyrename information to the aliased dentry. */ nfs_free_dname(data); + ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (alias->d_inode != NULL && + if (ret == 0 && alias->d_inode != NULL && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; alias->d_flags |= DCACHE_NFSFS_RENAMED; ret = 1; - } + } else + ret = 0; spin_unlock(&alias->d_lock); nfs_dec_sillycount(dir); dput(alias); @@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * point dentry is definitely not a root, so we won't need * that anymore. */ - if (devname_garbage) - kfree(devname_garbage); + kfree(devname_garbage); return ret; } data->dir = igrab(dir); @@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) if (parent == NULL) goto out_free; dir = parent->d_inode; - if (nfs_copy_dname(dentry, data) != 0) - goto out_dput; /* Non-exclusive lock protects against concurrent lookup() calls */ spin_lock(&dir->i_lock); if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { @@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct nfs_renamedata *data = calldata; struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; + struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); @@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) } if (task->tk_status != 0) { - nfs_cancel_async_unlink(data->old_dentry); + nfs_cancel_async_unlink(old_dentry); return; } - nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); - d_move(data->old_dentry, data->new_dentry); + d_drop(old_dentry); + d_drop(new_dentry); } /** @@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, * and only performs the unlink once the last reference to it is put. * * The final cleanup is done during dentry_iput. + * + * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server + * could take responsibility for keeping open files referenced. The server + * would also need to ensure that opened-but-deleted files were kept over + * reboots. However, we may not assume a server does so. (RFC 5661 + * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can + * use to advertise that it does this; some day we may take advantage of + * it.)) */ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) @@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (error) goto out_dput; + /* populate unlinkdata with the right dname */ + error = nfs_copy_dname(sdentry, + (struct nfs_unlinkdata *)dentry->d_fsdata); + if (error) { + nfs_cancel_async_unlink(dentry); + goto out_dput; + } + /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry); if (IS_ERR(task)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e268e3b2349..b39b37f8091 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_write_data *wdata) { put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); @@ -409,7 +409,7 @@ out: */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); @@ -438,7 +438,7 @@ nfs_mark_request_dirty(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); @@ -845,49 +845,75 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static int nfs_write_rpcsetup(struct nfs_page *req, +static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg, int how) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_context->path.dentry->d_inode; + data->inode = inode = req->wb_context->dentry->d_inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; + /* pnfs_set_layoutcommit needs this */ + data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; data->args.stable = NFS_UNSTABLE; - if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - data->args.stable = NFS_DATA_SYNC; - if (!nfs_need_commit(NFS_I(inode))) - data->args.stable = NFS_FILE_SYNC; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_need_commit(NFS_I(inode))) + break; + default: + data->args.stable = NFS_FILE_SYNC; } data->res.fattr = &data->fattr; data->res.count = count; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_write(struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + int how) +{ + struct inode *inode = data->args.context->dentry->d_inode; return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } +static int nfs_do_multiple_writes(struct list_head *head, + const struct rpc_call_ops *call_ops, + int how) +{ + struct nfs_write_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_write(data, call_ops, how); + if (ret == 0) + ret = ret2; + } + return ret; +} + /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -905,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_write_data *data; - size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; + size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; - LIST_HEAD(list); nfs_list_remove_request(req); @@ -925,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -932,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + data->pagevec[0] = page; + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - - BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_write_data, pages); - list_del_init(&data->pages); - - data->pagevec[0] = page; - - if (nbytes < wsize) - wsize = nbytes; - ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, lseg, desc->pg_ioflags); - if (ret == 0) - ret = ret2; - offset += wsize; - nbytes -= wsize; - } while (nbytes != 0); - - put_lseg(lseg); - desc->pg_lseg = NULL; + desc->pg_rpc_callops = &nfs_write_partial_ops; return ret; out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, pages); - list_del(&data->pages); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_write_data, list); + list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); @@ -985,14 +986,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret; + int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); @@ -1014,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); + list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_write_full_ops; out: - put_lseg(lseg); /* Cleans any gotten in ->pg_test */ - desc->pg_lseg = NULL; return ret; } -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc, head); + return nfs_flush_one(desc, head); +} + +static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, + desc->pg_ioflags); + return ret; +} + +static const struct nfs_pageio_ops nfs_pageio_write_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - size_t wsize = NFS_SERVER(inode)->wsize; + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + NFS_SERVER(inode)->wsize, ioflags); +} + +void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; +} +EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); - if (wsize < PAGE_CACHE_SIZE) - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); - else - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) +{ + if (!pnfs_pageio_init_write(pgio, inode, ioflags)) + nfs_pageio_init_write_mds(pgio, inode, ioflags); } /* @@ -1051,9 +1081,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) dprintk("NFS: %5u write(%s/%lld %d@%lld)", task->tk_pid, - data->req->wb_context->path.dentry->d_inode->i_sb->s_id, + data->req->wb_context->dentry->d_inode->i_sb->s_id, (long long) - NFS_FILEID(data->req->wb_context->path.dentry->d_inode), + NFS_FILEID(data->req->wb_context->dentry->d_inode), data->req->wb_bytes, (long long)req_offset(data->req)); nfs_writeback_done(task, data); @@ -1146,8 +1176,8 @@ static void nfs_writeback_release_full(void *calldata) dprintk("NFS: %5u write (%s/%lld %d@%lld)", data->task.tk_pid, - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1345,7 +1375,7 @@ void nfs_init_commit(struct nfs_write_data *data, struct pnfs_layout_segment *lseg) { struct nfs_page *first = nfs_list_entry(head->next); - struct inode *inode = first->wb_context->path.dentry->d_inode; + struct inode *inode = first->wb_context->dentry->d_inode; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1433,8 +1463,8 @@ void nfs_commit_release_pages(struct nfs_write_data *data) nfs_clear_request_commit(req); dprintk("NFS: commit (%s/%lld %d@%lld)", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_context->dentry->d_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { @@ -1564,8 +1594,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int status; bool sync = true; - if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || - wbc->for_background) + if (wbc->sync_mode == WB_SYNC_NONE) sync = false; status = pnfs_layoutcommit_inode(inode, sync); |