summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback_xdr.c28
-rw-r--r--fs/nfs/delegation.c19
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c116
-rw-r--r--fs/nfs/direct.c946
-rw-r--r--fs/nfs/file.c50
-rw-r--r--fs/nfs/idmap.c47
-rw-r--r--fs/nfs/inode.c240
-rw-r--r--fs/nfs/iostat.h164
-rw-r--r--fs/nfs/mount_clnt.c19
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3acl.c16
-rw-r--r--fs/nfs/nfs3proc.c246
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4proc.c182
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/pagelist.c16
-rw-r--r--fs/nfs/proc.c156
-rw-r--r--fs/nfs/read.c108
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfs/write.c300
23 files changed, 1719 insertions, 988 deletions
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a77..90c95adc8c1 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfs_fs.h>
+#include <linux/mutex.h>
#include <net/inet_sock.h>
@@ -31,7 +32,7 @@ struct nfs_callback_data {
};
static struct nfs_callback_data nfs_callback_info;
-static DECLARE_MUTEX(nfs_callback_sema);
+static DEFINE_MUTEX(nfs_callback_mutex);
static struct svc_program nfs4_callback_program;
unsigned int nfs_callback_set_tcpport;
@@ -55,7 +56,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
complete(&nfs_callback_info.started);
- while (nfs_callback_info.users != 0 || !signalled()) {
+ for(;;) {
+ if (signalled()) {
+ if (nfs_callback_info.users == 0)
+ break;
+ flush_signals(current);
+ }
/*
* Listen for a request on the socket
*/
@@ -73,6 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
svc_process(serv, rqstp);
}
+ svc_exit_thread(rqstp);
nfs_callback_info.pid = 0;
complete(&nfs_callback_info.stopped);
unlock_kernel();
@@ -89,7 +96,7 @@ int nfs_callback_up(void)
int ret = 0;
lock_kernel();
- down(&nfs_callback_sema);
+ mutex_lock(&nfs_callback_mutex);
if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
goto out;
init_completion(&nfs_callback_info.started);
@@ -115,7 +122,7 @@ int nfs_callback_up(void)
nfs_callback_info.serv = serv;
wait_for_completion(&nfs_callback_info.started);
out:
- up(&nfs_callback_sema);
+ mutex_unlock(&nfs_callback_mutex);
unlock_kernel();
return ret;
out_destroy:
@@ -133,13 +140,15 @@ int nfs_callback_down(void)
int ret = 0;
lock_kernel();
- down(&nfs_callback_sema);
- if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
- goto out;
- kill_proc(nfs_callback_info.pid, SIGKILL, 1);
- wait_for_completion(&nfs_callback_info.stopped);
-out:
- up(&nfs_callback_sema);
+ mutex_lock(&nfs_callback_mutex);
+ nfs_callback_info.users--;
+ do {
+ if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+ break;
+ if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+ break;
+ } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
+ mutex_unlock(&nfs_callback_mutex);
unlock_kernel();
return ret;
}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a9..05c38cf40b6 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
{
- uint32_t *savep;
+ uint32_t *savep = NULL;
unsigned status = res->status;
if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp)
{
- struct callback_op *op;
- unsigned int op_nr;
+ struct callback_op *op = &callback_ops[0];
+ unsigned int op_nr = OP_CB_ILLEGAL;
unsigned int status = 0;
long maxlen;
unsigned res;
dprintk("%s: start\n", __FUNCTION__);
status = decode_op_hdr(xdr_in, &op_nr);
- if (unlikely(status != 0)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- status = htonl(NFS4ERR_OP_ILLEGAL);
- } else
- op = &callback_ops[op_nr];
+ if (likely(status == 0)) {
+ switch (op_nr) {
+ case OP_CB_GETATTR:
+ case OP_CB_RECALL:
+ op = &callback_ops[op_nr];
+ break;
+ default:
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ status = htonl(NFS4ERR_OP_ILLEGAL);
+ }
+ }
maxlen = xdr_out->end - xdr_out->p;
if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
decode_compound_hdr_arg(&xdr_in, &hdr_arg);
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
+ hdr_res.nops = NULL;
encode_compound_hdr_res(&xdr_out, &hdr_res);
for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e..d3be923d4e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
nfs_free_delegation(delegation);
}
}
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int res = 0;
+
+ if (nfsi->delegation_state == 0)
+ return 0;
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL) {
+ memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ res = 1;
+ }
+ spin_unlock(&clp->cl_lock);
+ return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce77..3858694652f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead69..a23f3489416 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFS_PARANOIA 1
/* #define NFS_DEBUG_VERBOSE 1 */
@@ -53,7 +54,7 @@ static int nfs_rename(struct inode *, struct dentry *,
static int nfs_fsync_dir(struct file *, struct dentry *, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-struct file_operations nfs_dir_operations = {
+const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
.read = generic_read_dir,
.readdir = nfs_readdir,
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
{
int res = 0;
+ dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+ inode->i_sb->s_id, inode->i_ino);
+
lock_kernel();
/* Call generic open code in order to cache credentials */
if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
unsigned long timestamp;
int error;
- dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+ dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+ __FUNCTION__, (long long)desc->entry->cookie,
+ page->index);
again:
timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
status;
while((status = dir_decode(desc)) == 0) {
- dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+ __FUNCTION__, (unsigned long long)entry->cookie);
if (entry->prev_cookie == *desc->dir_cookie)
break;
if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
return status;
}
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
if (status)
break;
- dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+ dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+ (unsigned long long)entry->cookie, desc->current_index);
if (desc->file->f_pos == desc->current_index) {
*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
return status;
}
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
struct page *page;
int status;
- dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+ dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+ __FUNCTION__, desc->page_index,
+ (long long) *desc->dir_cookie);
page = read_cache_page(inode->i_mapping, desc->page_index,
(filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
if (status < 0)
dir_page_release(desc);
out:
- dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
return status;
read_error:
page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
/* Always search-by-index from the beginning of the cache */
if (*desc->dir_cookie == 0) {
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+ (long long)desc->file->f_pos);
desc->page_index = 0;
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
desc->current_index = 0;
} else
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
for (;;) {
res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
return res;
}
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
int loop_count = 0,
res;
- dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+ (unsigned long long)entry->cookie);
for(;;) {
unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
- dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+ (unsigned long long)*desc->dir_cookie, res);
return res;
}
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct page *page = NULL;
int status;
- dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
page = alloc_page(GFP_HIGHUSER);
if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
out:
- dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+ __FUNCTION__, status);
return status;
out_release:
dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct nfs_fattr fattr;
long res;
+ dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (long long)filp->f_pos);
+ nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
lock_kernel();
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
unlock_kernel();
- if (res < 0)
- return res;
- return 0;
+ if (res > 0)
+ res = 0;
+ dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ res);
+ return res;
}
loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
*/
int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
{
+ dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
+
return 0;
}
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
parent = dget_parent(dentry);
lock_kernel();
dir = parent->d_inode;
+ nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
}
if (is_bad_inode(inode)) {
- dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
goto out_bad;
}
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
out_valid:
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
d_drop(dentry);
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
}
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
- if (!inode)
+ res = (struct dentry *)inode;
+ if (IS_ERR(res))
goto out_unlock;
no_entry:
res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
struct dentry *res = NULL;
int error;
+ dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
- if (!inode) {
+ if (IS_ERR(inode)) {
dput(dentry);
return NULL;
}
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (error < 0)
goto out_err;
}
- error = -ENOMEM;
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- if (inode == NULL)
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_err;
d_instantiate(dentry, inode);
return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
int error;
int open_flags = 0;
- dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
lock_kernel();
nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
atomic_read(&dentry->d_count));
+ nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
#ifdef NFS_PARANOIA
if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
sillycounter++;
sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
- dfprintk(VFS, "trying to rename %s to %s\n",
- dentry->d_name.name, silly);
+ dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
struct rpc_cred *cred;
int res = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
if (mask == 0)
goto out;
/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
res = PTR_ERR(cred);
unlock_kernel();
out:
+ dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask, NULL);
unlock_kernel();
- return res;
+ goto out;
}
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 04ab2fc360e..0f583cb16dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
- * (multiple copies of the same instance running on separate hosts)
+ * (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
- * system cache protocols. Applications that process datasets
- * considerably larger than the client's memory do not always benefit
- * from a local cache. A streaming video server, for instance, has no
+ * system cache protocols. Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
+ * 04 May 2005 support O_DIRECT with aio --cel
*
*/
@@ -54,8 +55,9 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
static kmem_cache_t *nfs_direct_cachep;
@@ -64,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
- struct list_head list; /* nfs_read_data structs */
- wait_queue_head_t wait; /* wait for i/o completion */
+
+ /* I/O parameters */
+ struct list_head list, /* nfs_read/write_data structs */
+ rewrite_list; /* saved nfs_write_data structs */
+ struct nfs_open_context *ctx; /* file open context info */
+ struct kiocb * iocb; /* controlling i/o request */
+ struct inode * inode; /* target file of i/o */
+ unsigned long user_addr; /* location of user's buffer */
+ size_t user_count; /* total bytes to move */
+ loff_t pos; /* starting offset in file */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
- atomic_t complete, /* i/os we're waiting for */
- count, /* bytes actually processed */
+
+ /* completion state */
+ spinlock_t lock; /* protect completion state */
+ int outstanding; /* i/os we're waiting for */
+ ssize_t count, /* bytes actually processed */
error; /* any reported error */
+ struct completion completion; /* wait for i/o completion */
+
+ /* commit state */
+ struct nfs_write_data * commit_data; /* special write_data for commits */
+ int flags;
+#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ struct nfs_writeverf verf; /* unstable write verifier */
};
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O. However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
*/
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
- struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+ struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+ dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+ dentry->d_name.name, (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+ for (i = 0; i < npages; i++) {
+ struct page *page = pages[i];
+ if (do_dirty && !PageCompound(page))
+ set_page_dirty_lock(page);
+ page_cache_release(page);
+ }
+ kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
{
int result = -ENOMEM;
unsigned long page_count;
size_t array_size;
- /* set an arbitrary limit to prevent type overflow */
- /* XXX: this can probably be as large as INT_MAX */
- if (size > MAX_DIRECTIO_SIZE) {
- *pages = NULL;
- return -EFBIG;
- }
-
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
@@ -107,66 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
page_count, (rw == READ), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
+ if (result != page_count) {
+ /*
+ * If we got fewer pages than expected from
+ * get_user_pages(), the user buffer runs off the
+ * end of a mapping; return EFAULT.
+ */
+ if (result >= 0) {
+ nfs_free_user_pages(*pages, result, 0);
+ result = -EFAULT;
+ } else
+ kfree(*pages);
+ *pages = NULL;
+ }
}
return result;
}
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
- int i;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (do_dirty && !PageCompound(page))
- set_page_dirty_lock(page);
- page_cache_release(page);
- }
- kfree(pages);
+ struct nfs_direct_req *dreq;
+
+ dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ if (!dreq)
+ return NULL;
+
+ kref_init(&dreq->kref);
+ init_completion(&dreq->completion);
+ INIT_LIST_HEAD(&dreq->list);
+ INIT_LIST_HEAD(&dreq->rewrite_list);
+ dreq->iocb = NULL;
+ dreq->ctx = NULL;
+ spin_lock_init(&dreq->lock);
+ dreq->outstanding = 0;
+ dreq->count = 0;
+ dreq->error = 0;
+ dreq->flags = 0;
+
+ return dreq;
}
-/**
- * nfs_direct_req_release - release nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
static void nfs_direct_req_release(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+ if (dreq->ctx != NULL)
+ put_nfs_open_context(dreq->ctx);
kmem_cache_free(nfs_direct_cachep, dreq);
}
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+ ssize_t result = -EIOCBQUEUED;
+
+ /* Async requests don't wait here */
+ if (dreq->iocb)
+ goto out;
+
+ result = wait_for_completion_interruptible(&dreq->completion);
+
+ if (!result)
+ result = dreq->error;
+ if (!result)
+ result = dreq->count;
+
+out:
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete. This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
*
+ * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request. If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+ nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+ if (dreq->iocb) {
+ long res = (long) dreq->error;
+ if (!res)
+ res = (long) dreq->count;
+ aio_complete(dreq->iocb, res, 0);
+ }
+ complete_all(&dreq->completion);
+
+ kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
- unsigned int reads = 0;
unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ dreq = nfs_direct_req_alloc();
if (!dreq)
return NULL;
- kref_init(&dreq->kref);
- init_waitqueue_head(&dreq->wait);
- INIT_LIST_HEAD(&dreq->list);
- atomic_set(&dreq->count, 0);
- atomic_set(&dreq->error, 0);
-
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -186,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
list_add(&data->pages, list);
data->req = (struct nfs_page *) dreq;
- reads++;
+ dreq->outstanding++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
- atomic_set(&dreq->complete, reads);
return dreq;
}
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- if (likely(status >= 0))
- atomic_add(data->res.count, &dreq->count);
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
+
+ if (likely(task->tk_status >= 0))
+ dreq->count += data->res.count;
else
- atomic_set(&dreq->error, status);
+ dreq->error = task->tk_status;
- if (unlikely(atomic_dec_and_test(&dreq->complete))) {
- nfs_free_user_pages(dreq->pages, dreq->npages, 1);
- wake_up(&dreq->wait);
- kref_put(&dreq->kref, nfs_direct_req_release);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+
+ spin_unlock(&dreq->lock);
+ nfs_direct_complete(dreq);
}
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+ .rpc_call_done = nfs_direct_read_result,
+ .rpc_release = nfs_readdata_release,
+};
+
+/*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
- struct inode *inode, struct nfs_open_context *ctx,
- unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int curpage, pgbase;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
- pgbase = user_addr & ~PAGE_MASK;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
- unsigned int bytes;
+ size_t bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
+ BUG_ON(list_empty(list));
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
@@ -259,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
- data->args.offset = file_offset;
+ data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
@@ -267,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->res.eof = 0;
data->res.count = bytes;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_read_direct_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
- data->complete = nfs_direct_read_result;
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
- dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
- file_offset += bytes;
+ pos += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
count -= bytes;
} while (count != 0);
+ BUG_ON(!list_empty(list));
}
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
- int result = 0;
-
- if (intr) {
- result = wait_event_interruptible(dreq->wait,
- (atomic_read(&dreq->complete) == 0));
- } else {
- wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
- }
-
- if (!result)
- result = atomic_read(&dreq->error);
- if (!result)
- result = atomic_read(&dreq->count);
-
- kref_put(&dreq->kref, nfs_direct_req_release);
- return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment. Generate separate
- * read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
@@ -345,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
if (!dreq)
return -ENOMEM;
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
dreq->pages = pages;
dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+ nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
rpc_clnt_sigmask(clnt, &oldset);
- nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
- file_offset);
- result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+ nfs_direct_read_schedule(dreq);
+ result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- * then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
- const struct iovec *iov, loff_t file_offset,
- unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ while (!list_empty(&dreq->list)) {
+ struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_release(data);
+ }
+}
- result = nfs_direct_read_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+ struct list_head *pos;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- return result;
- }
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ list_for_each(pos, &dreq->list)
+ dreq->outstanding++;
+ dreq->count = 0;
+
+ nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+ if (unlikely(task->tk_status < 0)) {
+ dreq->error = task->tk_status;
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
+ if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
- return tot_bytes;
+ dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+ nfs_direct_write_complete(dreq, data->inode);
}
-/**
- * nfs_direct_write_seg - Write out one iov segment. Generate separate
- * write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+ .rpc_call_done = nfs_direct_commit_result,
+ .rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- const unsigned int wsize = NFS_SERVER(inode)->wsize;
- size_t request;
- int curpage, need_commit;
- ssize_t result, tot_bytes;
- struct nfs_writeverf first_verf;
- struct nfs_write_data *wdata;
-
- wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
- if (!wdata)
- return -ENOMEM;
+ struct nfs_write_data *data = dreq->commit_data;
+ struct rpc_task *task = &data->task;
- wdata->inode = inode;
- wdata->cred = ctx->cred;
- wdata->args.fh = NFS_FH(inode);
- wdata->args.context = ctx;
- wdata->args.stable = NFS_UNSTABLE;
- if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
- wdata->args.stable = NFS_FILE_SYNC;
- wdata->res.fattr = &wdata->fattr;
- wdata->res.verf = &wdata->verf;
+ data->inode = dreq->inode;
+ data->cred = dreq->ctx->cred;
- nfs_begin_data_update(inode);
-retry:
- need_commit = 0;
- tot_bytes = 0;
- curpage = 0;
- request = count;
- wdata->args.pgbase = user_addr & ~PAGE_MASK;
- wdata->args.offset = file_offset;
- do {
- wdata->args.count = request;
- if (wdata->args.count > wsize)
- wdata->args.count = wsize;
- wdata->args.pages = &pages[curpage];
+ data->args.fh = NFS_FH(data->inode);
+ data->args.offset = dreq->pos;
+ data->args.count = dreq->user_count;
+ data->res.count = 0;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
- dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
- wdata->args.count, (long long) wdata->args.offset,
- user_addr + tot_bytes, wdata->args.pgbase, curpage);
+ rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+ &nfs_commit_direct_ops, data);
+ NFS_PROTO(data->inode)->commit_setup(data, 0);
- lock_kernel();
- result = NFS_PROTO(inode)->write(wdata);
- unlock_kernel();
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long)data->inode;
+ /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+ dreq->commit_data = NULL;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- goto out;
- }
+ dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
- if (tot_bytes == 0)
- memcpy(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier));
- if (wdata->verf.committed != NFS_FILE_SYNC) {
- need_commit = 1;
- if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier)))
- goto sync_retry;
- }
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+}
- tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ int flags = dreq->flags;
- /* in case of a short write: stop now, let the app recover */
- if (result < wdata->args.count)
+ dreq->flags = 0;
+ switch (flags) {
+ case NFS_ODIRECT_DO_COMMIT:
+ nfs_direct_commit_schedule(dreq);
+ break;
+ case NFS_ODIRECT_RESCHED_WRITES:
+ nfs_direct_write_reschedule(dreq);
break;
+ default:
+ nfs_end_data_update(inode);
+ if (dreq->commit_data != NULL)
+ nfs_commit_free(dreq->commit_data);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+ }
+}
- wdata->args.offset += result;
- wdata->args.pgbase += result;
- curpage += wdata->args.pgbase >> PAGE_SHIFT;
- wdata->args.pgbase &= ~PAGE_MASK;
- request -= result;
- } while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = nfs_commit_alloc(0);
+ if (dreq->commit_data != NULL)
+ dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = NULL;
+}
- /*
- * Commit data written so far, even in the event of an error
- */
- if (need_commit) {
- wdata->args.count = tot_bytes;
- wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ nfs_end_data_update(inode);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+}
+#endif
- lock_kernel();
- result = NFS_PROTO(inode)->commit(wdata);
- unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+ struct list_head *list;
+ struct nfs_direct_req *dreq;
+ unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ return NULL;
+
+ list = &dreq->list;
+ for(;;) {
+ struct nfs_write_data *data = nfs_writedata_alloc(wpages);
+
+ if (unlikely(!data)) {
+ while (!list_empty(list)) {
+ data = list_entry(list->next,
+ struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_free(data);
+ }
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return NULL;
+ }
- if (result < 0 || memcmp(&first_verf.verifier,
- &wdata->verf.verifier,
- sizeof(first_verf.verifier)) != 0)
- goto sync_retry;
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, list);
+
+ data->req = (struct nfs_page *) dreq;
+ dreq->outstanding++;
+ if (nbytes <= wsize)
+ break;
+ nbytes -= wsize;
}
- result = tot_bytes;
-out:
- nfs_end_data_update(inode);
- nfs_writedata_free(wdata);
- return result;
+ nfs_alloc_commit_data(dreq);
-sync_retry:
- wdata->args.stable = NFS_FILE_SYNC;
- goto retry;
+ kref_get(&dreq->kref);
+ return dreq;
}
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- * then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
- struct nfs_open_context *ctx, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = task->tk_status;
+
+ if (nfs_writeback_done(task, data) != 0)
+ return;
- result = nfs_direct_write_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
- nfs_free_user_pages(pages, page_count, 0);
+ spin_lock(&dreq->lock);
- if (result <= 0) {
- if (tot_bytes > 0)
+ if (likely(status >= 0))
+ dreq->count += data->res.count;
+ else
+ dreq->error = task->tk_status;
+
+ if (data->res.verf->committed != NFS_FILE_SYNC) {
+ switch (dreq->flags) {
+ case 0:
+ memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
break;
- return result;
+ case NFS_ODIRECT_DO_COMMIT:
+ if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+ dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
}
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
}
- return tot_bytes;
+ /* In case we have to resend */
+ data->args.stable = NFS_FILE_SYNC;
+
+ spin_unlock(&dreq->lock);
}
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
*/
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
{
- ssize_t result = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- /*
- * No support for async yet
- */
- if (!is_sync_kiocb(iocb))
- return result;
-
- ctx = (struct nfs_open_context *)file->private_data;
- switch (rw) {
- case READ:
- dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_read(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- case WRITE:
- dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_write(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- default:
- break;
+ spin_lock(&dreq->lock);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+ spin_unlock(&dreq->lock);
+
+ nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+ .rpc_call_done = nfs_direct_write_result,
+ .rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
+ struct list_head *list = &dreq->list;
+ struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int curpage, pgbase;
+
+ curpage = 0;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
+ do {
+ struct nfs_write_data *data;
+ size_t bytes;
+
+ bytes = wsize;
+ if (count < wsize)
+ bytes = count;
+
+ BUG_ON(list_empty(list));
+ data = list_entry(list->next, struct nfs_write_data, pages);
+ list_move_tail(&data->pages, &dreq->rewrite_list);
+
+ data->inode = inode;
+ data->cred = ctx->cred;
+ data->args.fh = NFS_FH(inode);
+ data->args.context = ctx;
+ data->args.offset = pos;
+ data->args.pgbase = pgbase;
+ data->args.pages = &pages[curpage];
+ data->args.count = bytes;
+ data->res.fattr = &data->fattr;
+ data->res.count = bytes;
+ data->res.verf = &data->verf;
+
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_write_direct_ops, data);
+ NFS_PROTO(inode)->write_setup(data, sync);
+
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long) inode;
+
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+
+ dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ bytes,
+ (unsigned long long)data->args.offset);
+
+ pos += bytes;
+ pgbase += bytes;
+ curpage += pgbase >> PAGE_SHIFT;
+ pgbase &= ~PAGE_MASK;
+
+ count -= bytes;
+ } while (count != 0);
+ BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+ ssize_t result;
+ sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_direct_req *dreq;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ int sync = 0;
+
+ dreq = nfs_direct_write_alloc(count, wsize);
+ if (!dreq)
+ return -ENOMEM;
+ if (dreq->commit_data == NULL || count < wsize)
+ sync = FLUSH_STABLE;
+
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
+ dreq->pages = pages;
+ dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+ nfs_begin_data_update(inode);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ nfs_direct_write_schedule(dreq, sync);
+ result = nfs_direct_wait(dreq);
+ rpc_clnt_sigunmask(clnt, &oldset);
+
return result;
}
@@ -630,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. So our preference is simply
+ * READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
- *
+ *
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
- loff_t *ppos = &iocb->ki_pos;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = count,
- };
dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- if (!is_sync_kiocb(iocb))
- goto out;
if (count < 0)
goto out;
retval = -EFAULT;
- if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_WRITE, buf, count))
goto out;
retval = 0;
if (!count)
@@ -682,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (retval)
goto out;
- retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(READ, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+ pages, page_count);
if (retval > 0)
- *ppos = pos + retval;
+ iocb->ki_pos = pos + retval;
out:
return retval;
@@ -694,8 +810,8 @@ out:
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -715,28 +831,19 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
ssize_t retval;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = (char __user *)buf,
- };
dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- retval = -EINVAL;
- if (!is_sync_kiocb(iocb))
- goto out;
-
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
@@ -747,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
retval = 0;
if (!count)
goto out;
- iov.iov_len = count,
retval = -EFAULT;
- if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_READ, buf, count))
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
- retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+ pos, pages, page_count);
+
+ /*
+ * XXX: nfs_end_data_update() already ensures this file's
+ * cached data is subsequently invalidated. Do we really
+ * need to call invalidate_inode_pages2() again here?
+ *
+ * For aio writes, this invalidation will almost certainly
+ * occur before the writes complete. Kind of racey.
+ */
if (mapping->nrpages)
invalidate_inode_pages2(mapping);
+
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -767,11 +890,16 @@ out:
return retval;
}
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
- 0, SLAB_RECLAIM_ACCOUNT,
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
NULL, NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
@@ -779,6 +907,10 @@ int nfs_init_directcache(void)
return 0;
}
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f53..f1df2c8d925 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -48,7 +49,7 @@ static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
-struct file_operations nfs_file_operations = {
+const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
- struct nfs_server *server = NFS_SERVER(inode);
- int (*open)(struct inode *, struct file *);
int res;
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
+ nfs_inc_stats(inode, NFSIOS_VFSOPEN);
lock_kernel();
- /* Do NFSv4 open() call */
- if ((open = server->rpc_ops->file_open) != NULL)
- res = open(inode, filp);
+ res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
unlock_kernel();
return res;
}
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
filemap_fdatawrite(filp->f_mapping);
+ nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return NFS_PROTO(inode)->file_release(inode, filp);
}
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
lock_kernel();
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_file(inode, iocb->ki_filp);
+ nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
lock_kernel();
status = nfs_wb_all(inode);
if (!status) {
@@ -316,6 +318,16 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
return status;
}
+static void nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+ /* FIXME: we really should cancel any unstarted writes on this page */
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+ return !nfs_wb_page(page->mapping->host, page);
+}
+
struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -324,6 +336,8 @@ struct address_space_operations nfs_file_aops = {
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write,
+ .invalidatepage = nfs_invalidate_page,
+ .releasepage = nfs_release_page,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
@@ -365,6 +379,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
if (!count)
goto out;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, buf, count, pos);
out:
return result;
@@ -376,15 +391,17 @@ out_swapfile:
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
- struct file_lock *cfl;
+ struct file_lock cfl;
struct inode *inode = filp->f_mapping->host;
int status = 0;
lock_kernel();
/* Try local locking first */
- cfl = posix_test_lock(filp, fl);
- if (cfl != NULL) {
- locks_copy_lock(fl, cfl);
+ if (posix_test_lock(filp, fl, &cfl)) {
+ fl->fl_start = cfl.fl_start;
+ fl->fl_end = cfl.fl_end;
+ fl->fl_type = cfl.fl_type;
+ fl->fl_pid = cfl.fl_pid;
goto out;
}
@@ -425,10 +442,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -446,17 +461,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
else
status = do_vfs_lock(filp, fl);
unlock_kernel();
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -489,7 +501,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
nfs_sync_mapping(filp->f_mapping);
nfs_zap_caches(inode);
out:
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
@@ -504,9 +515,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
-
- if (!inode)
- return -EINVAL;
+ nfs_inc_stats(inode, NFSIOS_VFSLOCK);
/* No mandatory locks over NFS */
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +540,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags);
- if (!inode)
- return -EINVAL;
-
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333..3fab5b0cfc5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
*/
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
- struct semaphore idmap_lock; /* Serializes upcalls */
- struct semaphore idmap_im_lock; /* Protects the hashtable */
+ struct mutex idmap_lock; /* Serializes upcalls */
+ struct mutex idmap_im_lock; /* Protects the hashtable */
struct idmap_hashtable idmap_user_hash;
struct idmap_hashtable idmap_group_hash;
};
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
if (clp->cl_idmap != NULL)
return;
- if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
return;
- memset(idmap, 0, sizeof(*idmap));
-
snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
"%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
return;
}
- init_MUTEX(&idmap->idmap_lock);
- init_MUTEX(&idmap->idmap_im_lock);
+ mutex_init(&idmap->idmap_lock);
+ mutex_init(&idmap->idmap_im_lock);
init_waitqueue_head(&idmap->idmap_wq);
idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
if (!idmap)
return;
+ dput(idmap->idmap_dentry);
+ idmap->idmap_dentry = NULL;
rpc_unlink(idmap->idmap_path);
clp->cl_idmap = NULL;
kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
if (namelen >= IDMAP_NAMESZ)
return -EINVAL;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_name(h, name, namelen);
if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return (ret);
}
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
im = &idmap->idmap_im;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_id(h, id);
if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return ret;
}
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&im_in, src, mlen) != 0)
return (-EFAULT);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
ret = mlen;
im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
ret = mlen;
out:
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
return ret;
}
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
im->im_status = IDMAP_STATUS_LOOKUPFAIL;
wake_up(&idmap->idmap_wq);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
}
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95b7ef..2f7656b911b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct super_block *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
+static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static void nfs_zap_acl_cache(struct inode *);
static struct rpc_program nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -103,7 +107,7 @@ static struct rpc_version * nfs_version[] = {
static struct rpc_program nfs_program = {
.name = "nfs",
.number = NFS_PROGRAM,
- .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]),
+ .nrvers = ARRAY_SIZE(nfs_version),
.version = nfs_version,
.stats = &nfs_rpcstat,
.pipe_dir_name = "/nfs",
@@ -118,7 +122,7 @@ static struct rpc_version * nfsacl_version[] = {
struct rpc_program nfsacl_program = {
.name = "nfsacl",
.number = NFS_ACL_PROGRAM,
- .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+ .nrvers = ARRAY_SIZE(nfsacl_version),
.version = nfsacl_version,
.stats = &nfsacl_rpcstat,
};
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
static int
nfs_write_inode(struct inode *inode, int sync)
{
- int flags = sync ? FLUSH_WAIT : 0;
+ int flags = sync ? FLUSH_SYNC : 0;
int ret;
ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
{
struct nfs_server *server = NFS_SB(sb);
- struct inode *rooti;
int error;
error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
return ERR_PTR(error);
}
- rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
- if (!rooti)
- return ERR_PTR(-ENOMEM);
- return rooti;
+ return nfs_fhget(sb, rootfh, fsinfo->fattr);
}
/*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
sb->s_magic = NFS_SUPER_MAGIC;
+ server->io_stats = nfs_alloc_iostats();
+ if (server->io_stats == NULL)
+ return -ENOMEM;
+
root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
/* Did getting the root inode fail? */
if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
}
sb->s_root->d_op = server->rpc_ops->dentry_ops;
+ /* mount time stamp, in seconds */
+ server->mount_time = jiffies;
+
/* Get some general file system info */
if (server->namelen == 0 &&
server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
/* create transport and client */
xprt = xprt_create_proto(proto, &server->addr, &timeparms);
if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
}
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
{
static struct proc_nfs_info {
int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ 0, NULL, NULL }
};
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
char buf[12];
char *proto;
- seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
seq_printf(m, ",wsize=%d", nfss->wsize);
- if (nfss->acregmin != 3*HZ)
+ if (nfss->acregmin != 3*HZ || showdefaults)
seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
- if (nfss->acregmax != 60*HZ)
+ if (nfss->acregmax != 60*HZ || showdefaults)
seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
- if (nfss->acdirmin != 30*HZ)
+ if (nfss->acdirmin != 30*HZ || showdefaults)
seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
- if (nfss->acdirmax != 60*HZ)
+ if (nfss->acdirmax != 60*HZ || showdefaults)
seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
proto = buf;
}
seq_printf(m, ",proto=%s", proto);
+ seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+ seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+ nfs_show_mount_options(m, nfss, 0);
+
seq_puts(m, ",addr=");
seq_escape(m, nfss->hostname, " \t\n\\");
+
+ return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+ int i, cpu;
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+ struct rpc_auth *auth = nfss->client->cl_auth;
+ struct nfs_iostats totals = { };
+
+ seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+ /*
+ * Display all mount option settings
+ */
+ seq_printf(m, "\n\topts:\t");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+ nfs_show_mount_options(m, nfss, 1);
+
+ seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+ seq_printf(m, "\n\tcaps:\t");
+ seq_printf(m, "caps=0x%x", nfss->caps);
+ seq_printf(m, ",wtmult=%d", nfss->wtmult);
+ seq_printf(m, ",dtsize=%d", nfss->dtsize);
+ seq_printf(m, ",bsize=%d", nfss->bsize);
+ seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+ if (nfss->rpc_ops->version == 4) {
+ seq_printf(m, "\n\tnfsv4:\t");
+ seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+ }
+#endif
+
+ /*
+ * Display security flavor in effect for this mount
+ */
+ seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+ if (auth->au_flavor)
+ seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+ /*
+ * Display superblock I/O counters
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ struct nfs_iostats *stats;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ preempt_disable();
+ stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ totals.events[i] += stats->events[i];
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ totals.bytes[i] += stats->bytes[i];
+
+ preempt_enable();
+ }
+
+ seq_printf(m, "\n\tevents:\t");
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ seq_printf(m, "%lu ", totals.events[i]);
+ seq_printf(m, "\n\tbytes:\t");
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ seq_printf(m, "%Lu ", totals.bytes[i]);
+ seq_printf(m, "\n");
+
+ rpc_print_iostats(m, nfss->client);
+
return 0;
}
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
struct nfs_inode *nfsi = NFS_I(inode);
int mode = inode->i_mode;
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
.fh = fh,
.fattr = fattr
};
- struct inode *inode = NULL;
+ struct inode *inode = ERR_PTR(-ENOENT);
unsigned long hash;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
hash = nfs_fattr_to_ino_t(fattr);
- if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+ inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+ if (inode == NULL) {
+ inode = ERR_PTR(-ENOMEM);
goto out_no_inode;
+ }
if (inode->i_state & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
return inode;
out_no_inode:
- printk("nfs_fhget: iget failed\n");
+ dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
goto out;
}
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
struct nfs_fattr fattr;
int error;
+ nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
if (attr->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
lock_kernel();
nfs_begin_data_update(inode);
- /* Write all dirty data if we're changing file permissions or size */
- if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
- filemap_write_and_wait(inode->i_mapping);
- nfs_wb_all(inode);
- }
+ /* Write all dirty data */
+ filemap_write_and_wait(inode->i_mapping);
+ nfs_wb_all(inode);
/*
* Return any delegations if we're going to change ACLs
*/
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
+ nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
inode->i_size = attr->ia_size;
vmtruncate(inode, attr->ia_size);
}
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int err;
/* Flush out writes to the server in order to update c/mtime */
- nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+ nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
/*
* We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
return err;
}
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
{
struct nfs_open_context *ctx;
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
if (ctx != NULL) {
atomic_set(&ctx->count, 1);
ctx->dentry = dget(dentry);
+ ctx->vfsmnt = mntget(mnt);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
+ mntput(ctx->vfsmnt);
kfree(ctx);
}
}
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
* Ensure that mmap has a recent RPC credential for use when writing out
* shared pages
*/
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
return ctx;
}
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+ ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
put_rpccred(cred);
if (ctx == NULL)
return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
+ nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
if (S_ISREG(inode->i_mode))
nfs_sync_mapping(mapping);
invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid
+ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ return -EIO;
+ }
+
/* Are we in the process of updating data on the server? */
data_unstable = nfs_caches_unstable(inode);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
- if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
- nfsi->change_attr != fattr->change_attr) {
+ if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+ if (nfsi->change_attr == fattr->change_attr)
+ goto out;
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- /* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid
- || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
- return -EIO;
- }
-
- cur_size = i_size_read(inode);
- new_isize = nfs_size_to_loff_t(fattr->size);
-
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cur_size != new_isize) {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
- if (nfsi->npages == 0)
- nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
- }
+
+ cur_size = i_size_read(inode);
+ new_isize = nfs_size_to_loff_t(fattr->size);
+ if (cur_size != new_isize && nfsi->npages == 0)
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (inode->i_nlink != fattr->nlink)
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+out:
if (!timespec_equal(&inode->i_atime, &fattr->atime))
nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_change_attribute = jiffies;
}
- if ((fattr->valid & NFS_ATTR_FATTR_V4)
- && nfsi->change_attr != fattr->change_attr) {
- dprintk("NFS: change_attr change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- nfsi->change_attr = fattr->change_attr;
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = jiffies;
- }
-
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blksize = fattr->du.nfs2.blocksize;
}
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+ if (nfsi->change_attr != fattr->change_attr) {
+ dprintk("NFS: change_attr change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ nfsi->change_attr = fattr->change_attr;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfsi->cache_change_attribute = jiffies;
+ } else
+ invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+ }
+
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
#endif /* CONFIG_NFS_V3 */
s = ERR_PTR(-ENOMEM);
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
goto out_err;
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1679,7 +1786,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
s->s_flags = flags;
- error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+ error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
up_write(&s->s_umount);
deactivate_super(s);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
clp = nfs4_get_client(&server->addr.sin_addr);
if (!clp) {
dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
return ERR_PTR(-ENOMEM);
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1996,7 +2107,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
s->s_flags = flags;
- error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+ error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
up_write(&s->s_umount);
deactivate_super(s);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
- rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
+ rpciod_down();
+
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
@@ -2163,7 +2276,8 @@ static int nfs_init_inodecache(void)
{
nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
sizeof(struct nfs_inode),
- 0, SLAB_RECLAIM_ACCOUNT,
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
init_once, NULL);
if (nfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 00000000000..6350ecbde58
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
+/*
+ * linux/fs/nfs/iostat.h
+ *
+ * Declarations for NFS client per-mount statistics
+ *
+ * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ * NFS client per-mount statistics provide information about the health of
+ * the NFS client and the health of each NFS mount point. Generally these
+ * are not for detailed problem diagnosis, but simply to indicate that there
+ * is a problem.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1. SERVER - the number of payload bytes read from or written to the
+ * server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2. NORMAL - the number of bytes read or written by applications via
+ * the read(2) and write(2) system call interfaces.
+ *
+ * 3. DIRECT - the number of bytes read or written from files opened
+ * with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client. Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use. DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface. A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+ NFSIOS_NORMALREADBYTES = 0,
+ NFSIOS_NORMALWRITTENBYTES,
+ NFSIOS_DIRECTREADBYTES,
+ NFSIOS_DIRECTWRITTENBYTES,
+ NFSIOS_SERVERREADBYTES,
+ NFSIOS_SERVERWRITTENBYTES,
+ NFSIOS_READPAGES,
+ NFSIOS_WRITEPAGES,
+ __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging. The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches. This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+ NFSIOS_INODEREVALIDATE = 0,
+ NFSIOS_DENTRYREVALIDATE,
+ NFSIOS_DATAINVALIDATE,
+ NFSIOS_ATTRINVALIDATE,
+ NFSIOS_VFSOPEN,
+ NFSIOS_VFSLOOKUP,
+ NFSIOS_VFSACCESS,
+ NFSIOS_VFSUPDATEPAGE,
+ NFSIOS_VFSREADPAGE,
+ NFSIOS_VFSREADPAGES,
+ NFSIOS_VFSWRITEPAGE,
+ NFSIOS_VFSWRITEPAGES,
+ NFSIOS_VFSGETDENTS,
+ NFSIOS_VFSSETATTR,
+ NFSIOS_VFSFLUSH,
+ NFSIOS_VFSFSYNC,
+ NFSIOS_VFSLOCK,
+ NFSIOS_VFSRELEASE,
+ NFSIOS_CONGESTIONWAIT,
+ NFSIOS_SETATTRTRUNC,
+ NFSIOS_EXTENDWRITE,
+ NFSIOS_SILLYRENAME,
+ NFSIOS_SHORTREAD,
+ NFSIOS_SHORTWRITE,
+ NFSIOS_DELAY,
+ __NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+ unsigned long long bytes[__NFSIOS_BYTESMAX];
+ unsigned long events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->events[stat] ++;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+ nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->bytes[stat] += addend;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+ return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+ if (stats != NULL)
+ free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f678f..445abb4d421 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
struct mnt_fhstatus result = {
.fh = fh
};
+ struct rpc_message msg = {
+ .rpc_argp = path,
+ .rpc_resp = &result,
+ };
char hostname[32];
int status;
- int call;
dprintk("NFS: nfs_mount(%08x:%s)\n",
(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
if (IS_ERR(mnt_clnt))
return PTR_ERR(mnt_clnt);
- call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
- status = rpc_call(mnt_clnt, call, path, &result, 0);
+ if (version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+ else
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
return status < 0? status : (result.status? -EACCES : 0);
}
@@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MNTPROC_MNT,
+ .p_name = "MOUNT",
},
};
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MOUNTPROC3_MNT,
+ .p_name = "MOUNT",
},
};
@@ -174,7 +185,7 @@ static struct rpc_stat mnt_stats;
static struct rpc_program mnt_program = {
.name = "mount",
.number = NFS_MNT_PROGRAM,
- .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]),
+ .nrvers = ARRAY_SIZE(mnt_version),
.version = mnt_version,
.stats = &mnt_stats,
};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560c89c..f0015fa876e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFSPROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
PROC(GETATTR, fhandle, attrstat, 1),
@@ -704,6 +706,6 @@ struct rpc_procinfo nfs_procedures[] = {
struct rpc_version nfs_version2 = {
.number = 2,
- .nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+ .nrprocs = ARRAY_SIZE(nfs_procedures),
.procs = nfs_procedures
};
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae94..33287879bd2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
struct nfs3_getaclres res = {
.fattr = &fattr,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
struct posix_acl *acl;
int status, count;
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
return NULL;
dprintk("NFS call getacl\n");
- status = rpc_call(server->client_acl, ACLPROC3_GETACL,
- &args, &res, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.acl_access = acl,
.pages = pages,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status, count;
status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n");
nfs_begin_data_update(inode);
- status = rpc_call(server->client_acl, ACLPROC3_SETACL,
- &args, &fattr, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f055..cf186f0d2b3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
#include <linux/smp_lock.h>
#include <linux/nfs_mount.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
return res;
}
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- };
- return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
- nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
- nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
{
if (task->tk_status != -EJUKEBOX)
return 0;
+ nfs_inc_stats(inode, NFSIOS_DELAY);
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("%s: call fsinfo\n", __FUNCTION__);
nfs_fattr_init(info->fattr);
- status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
- status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_resp = info->fattr;
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
@@ -107,12 +103,16 @@ static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr,
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
- if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_argp = fhandle;
+ msg.rpc_resp = fattr;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ }
dprintk("NFS reply lookup: %d\n", status);
if (status >= 0)
status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred
+ .rpc_cred = entry->cred,
};
int mode = entry->mask;
int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status;
dprintk("NFS call readlink\n");
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
- &args, &fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply readlink: %d\n", status);
return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
again:
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
- nfs_post_op_update_inode(dir, &dir_attr);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
/* If the server doesn't support the exclusive creation semantics,
* try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
struct rpc_message *msg = &task->tk_msg;
struct nfs_fattr *dir_attr;
- if (nfs3_async_handle_jukebox(task))
+ if (nfs3_async_handle_jukebox(task, dir->d_inode))
return 1;
if (msg->rpc_argp) {
dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
.fromattr = &old_dir_attr,
.toattr = &new_dir_attr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
nfs_fattr_init(&old_dir_attr);
nfs_fattr_init(&new_dir_attr);
- status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_post_op_update_inode(old_dir, &old_dir_attr);
nfs_post_op_update_inode(new_dir, &new_dir_attr);
dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.dir_attr = &dir_attr,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
nfs_post_op_update_inode(inode, &fattr);
dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int mode = sattr->ia_mode;
int status;
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &dir_attr,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
nfs_fattr_init(&dir_attr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fh,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -707,11 +759,16 @@ static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
+ .rpc_argp = fhandle,
+ .rpc_resp = stat,
+ };
int status;
dprintk("NFS call fsstat\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
return status;
}
@@ -720,11 +777,16 @@ static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
@@ -733,40 +795,34 @@ static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call pathconf\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
/* Call back common NFS readpage processing */
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, &data->fattr);
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_read_ops = {
- .rpc_call_done = nfs3_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_write_ops = {
- .rpc_call_done = nfs3_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int stable;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
+ data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
- if (!NFS_I(inode)->ncommit)
- stable = NFS_FILE_SYNC;
- else
- stable = NFS_DATA_SYNC;
- } else
- stable = NFS_UNSTABLE;
- data->args.stable = stable;
-
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ data->args.stable = NFS_FILE_SYNC;
+ if (NFS_I(data->inode)->ncommit)
+ data->args.stable = NFS_DATA_SYNC;
+ }
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_commit_ops = {
- .rpc_call_done = nfs3_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_done = nfs3_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b5012bc..ec233619687 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
.p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
.p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFS3PROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
@@ -1138,7 +1140,7 @@ struct rpc_procinfo nfs3_procedures[] = {
struct rpc_version nfs_version3 = {
.number = 3,
- .nrprocs = sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+ .nrprocs = ARRAY_SIZE(nfs3_procedures),
.procs = nfs3_procedures
};
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
.p_timer = 1,
+ .p_name = "GETACL",
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
.p_timer = 0,
+ .p_name = "SETACL",
},
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 984ca3454d0..47ece1dd3c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
if (!(data->f_attr.valid & NFS_ATTR_FATTR))
goto out;
inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
- if (inode == NULL)
+ if (IS_ERR(inode))
goto out;
state = nfs4_get_open_state(inode, data->owner);
if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
- struct nfs4_state *res;
+ struct nfs4_state *res = ERR_PTR(-EIO);
int err;
do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
return res;
}
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_setattrargs arg = {
- .fh = fhandle,
+ .fh = NFS_FH(inode),
.iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
- if (state != NULL) {
+ if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ /* Use that stateid */
+ } else if (state != NULL) {
msg.rpc_cred = state->owner->so_cred;
nfs4_copy_stateid(&arg.stateid, state, current->files);
} else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return status;
}
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_do_setattr(server, fattr, fhandle, sattr,
- state),
+ _nfs4_do_setattr(inode, fattr, sattr, state),
&exception);
} while (exception.retry);
return err;
@@ -1430,7 +1438,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
if (status == 0)
status = nfs4_do_fsinfo(server, fhandle, info);
out:
- return status;
+ return nfs4_map_errors(status);
}
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
if (ctx != NULL)
state = ctx->state;
- status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
- NFS_FH(inode), sattr, state);
+ status = nfs4_do_setattr(inode, fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
d_instantiate(dentry, igrab(state->inode));
if (flags & O_EXCL) {
struct nfs_fattr fattr;
- status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
- NFS_FH(state->inode), sattr, state);
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(state->inode, sattr);
}
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
- struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(data->inode);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status > 0)
- renew_lease(NFS_SERVER(inode), data->timestamp);
- /* Call back common NFS readpage processing */
- nfs_readpage_result(task, calldata);
+ renew_lease(server, data->timestamp);
+ return 0;
}
-static const struct rpc_call_ops nfs4_read_ops = {
- .rpc_call_done = nfs4_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- int flags;
data->timestamp = jiffies;
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs_post_op_update_inode(inode, data->res.fattr);
}
- /* Call back common NFS writeback processing */
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_write_ops = {
- .rpc_call_done = nfs4_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
.rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
struct inode *inode = data->inode;
struct nfs_server *server = NFS_SERVER(inode);
int stable;
- int flags;
if (how & FLUSH_STABLE) {
if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
data->timestamp = jiffies;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0)
nfs_post_op_update_inode(inode, data->res.fattr);
- /* Call back common NFS writeback processing */
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_commit_ops = {
- .rpc_call_done = nfs4_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- struct nfs_server *server = NFS_SERVER(inode);
- int flags;
+ struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
/*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
rpc_wake_up_task(task);
task->tk_status = 0;
return -EAGAIN;
- case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
+ nfs_inc_server_stats((struct nfs_server *) server,
+ NFSIOS_DELAY);
+ case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
task->tk_status = 0;
return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
return status;
}
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
return status;
}
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ long timeout;
+ int err;
+ do {
+ err = _nfs4_proc_setclientid_confirm(clp, cred);
+ switch (err) {
+ case 0:
+ return err;
+ case -NFS4ERR_RESOURCE:
+ /* The IBM lawyers misread another document! */
+ case -NFS4ERR_DELAY:
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ }
+ } while (err == 0);
+ return err;
+}
+
struct nfs4_delegreturndata {
struct nfs4_delegreturnargs args;
struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
kfree(calldata);
}
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_call_prepare = nfs4_delegreturn_prepare,
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
- if (IS_ERR(task)) {
- nfs4_delegreturn_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0) {
status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
struct nfs_seqid *seqid)
{
struct nfs4_unlockdata *data;
- struct rpc_task *task;
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Unlock _before_ we do the RPC call */
do_vfs_lock(fl->fl_file, fl);
- task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
- if (IS_ERR(task))
- nfs4_locku_release_calldata(data);
- return task;
+ return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
}
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = 1;
task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
&nfs4_lock_ops, data);
- if (IS_ERR(task)) {
- nfs4_lock_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
ret = nfs4_wait_for_completion_rpc_task(task);
if (ret == 0) {
ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
{
size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+ return 0;
if (buf && buflen < len)
return -ERANGE;
if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_done = nfs4_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7d..96e5b82c153 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef5778..7c5d70efe72 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs4_procedures[] = {
@@ -4384,7 +4386,7 @@ struct rpc_procinfo nfs4_procedures[] = {
struct rpc_version nfs_version4 = {
.number = 4,
- .nrprocs = sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+ .nrprocs = ARRAY_SIZE(nfs4_procedures),
.procs = nfs4_procedures
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e..106aca388eb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
+ BUG_ON(PagePrivate(page));
+ BUG_ON(!PageLocked(page));
+ BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
- spin_lock(&nfsi->req_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
- spin_unlock(&nfsi->req_lock);
+ if (req->wb_page != NULL) {
+ spin_lock(&nfsi->req_lock);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+ spin_unlock(&nfsi->req_lock);
+ }
nfs_unlock_request(req);
}
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
*/
void nfs_clear_request(struct nfs_page *req)
{
- if (req->wb_page) {
- page_cache_release(req->wb_page);
+ struct page *page = req->wb_page;
+ if (page != NULL) {
+ page_cache_release(page);
req->wb_page = NULL;
}
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03..9dd85cac2df 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("%s: call getattr\n", __FUNCTION__);
nfs_fattr_init(fattr);
- status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
- status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+ msg.rpc_resp = &fsinfo;
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
@@ -90,12 +97,16 @@ static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFSPROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READLINK],
+ .rpc_argp = &args,
+ };
int status;
dprintk("NFS call readlink\n");
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
nfs_fattr_init(&fattr);
dprintk("NFS call create %s\n", dentry->d_name.name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status, mode;
dprintk("NFS call mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = NULL
};
int status;
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
.toname = new_name->name,
.tolen = new_name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_mark_for_revalidate(old_dir);
nfs_mark_for_revalidate(new_dir);
dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.toname = name->name,
.tolen = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LINK],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_mark_for_revalidate(inode);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.tolen = path->len,
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
+ .rpc_argp = &arg,
+ };
int status;
if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(fattr);
fhandle->size = 0;
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
- .pages = &page
+ .pages = &page,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = cred
+ .rpc_cred = cred,
};
int status;
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call statfs\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
if (data->args.offset + data->args.count >= data->res.fattr->size)
data->res.eof = 1;
}
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_read_ops = {
- .rpc_call_done = nfs_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_write_ops = {
- .rpc_call_done = nfs_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = {
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs_decode_dirent,
.read_setup = nfs_proc_read_setup,
+ .read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
+ .write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8..624ca7146b6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
#include <asm/system.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_rdata_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_rdata_mempool);
+}
+
void nfs_readdata_release(void *data)
{
nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
}
count -= result;
rdata->args.pgbase += result;
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
* Set up the NFS read request struct
*/
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
{
struct inode *inode;
+ int flags;
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_readpage_result_partial;
if (nbytes > rsize) {
- nfs_read_rpcsetup(req, data, rsize, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ rsize, offset);
offset += rsize;
nbytes -= rsize;
} else {
- nfs_read_rpcsetup(req, data, nbytes, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ nbytes, offset);
nbytes = 0;
}
nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_readpage_result_full;
- nfs_read_rpcsetup(req, data, count, 0);
+ nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
nfs_execute_read(data);
return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
/*
* Handle a read reply that fills part of a page.
*/
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
- if (status >= 0) {
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+ if (task->tk_status >= 0) {
unsigned int request = data->args.count;
unsigned int result = data->res.count;
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_partial_ops = {
+ .rpc_call_done = nfs_readpage_result_partial,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
unsigned int count = data->res.count;
+ if (nfs_readpage_result(task, data) != 0)
+ return;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
- if (status >= 0) {
+ if (task->tk_status >= 0) {
if (count < PAGE_CACHE_SIZE) {
if (count < req->wb_bytes)
memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_full_ops = {
+ .rpc_call_done = nfs_readpage_result_full,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
- int status = task->tk_status;
+ int status;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
- task->tk_pid, status);
+ task->tk_pid, task->tk_status);
+
+ status = NFS_PROTO(data->inode)->read_done(task, data);
+ if (status != 0)
+ return status;
+
+ nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
/* Is this a short read? */
if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
task->tk_status = -EIO;
}
spin_lock(&data->inode->i_lock);
NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&data->inode->i_lock);
- data->complete(data, status);
+ return 0;
}
/*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
/*
* Try to flush any pending writes to the file..
*
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
nr_pages);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (!list_empty(&head)) {
int err = nfs_pagein_list(&head, server->rpages);
if (!ret)
+ nfs_add_stats(inode, NFSIOS_READPAGES, err);
ret = err;
}
put_nfs_open_context(desc.ctx);
@@ -597,10 +663,8 @@ int nfs_init_readpagecache(void)
if (nfs_rdata_cachep == NULL)
return -ENOMEM;
- nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
- mempool_alloc_slab,
- mempool_free_slab,
- nfs_rdata_cachep);
+ nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
+ nfs_rdata_cachep);
if (nfs_rdata_mempool == NULL)
return -ENOMEM;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d55..0e28189c215 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
int status = -ENOMEM;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
- memset(data, 0, sizeof(*data));
data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b683550..4cfada2cc09 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
#include <linux/smp_lock.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
struct inode *,
struct page *,
unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
p->pagevec = &p->page_array[0];
else {
size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kzalloc(size, GFP_NOFS);
+ if (!p->pagevec) {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+ struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
p->pagevec = kmalloc(size, GFP_NOFS);
if (p->pagevec) {
memset(p->pagevec, 0, size);
} else {
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
return p;
}
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
}
void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
i_size_write(inode, end);
}
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
wdata->args.pgbase += result;
written += result;
count -= result;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
} while (count);
/* Update file length */
nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
int priority = wb_priority(wbc);
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
/*
* Note: We need to ensure that we have a reference to the inode
* if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct inode *inode = mapping->host;
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
err = generic_writepages(mapping, wbc);
if (err)
return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
if (err < 0)
goto out;
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
wbc->nr_to_write -= err;
if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
+ SetPagePrivate(req->wb_page);
nfsi->npages++;
atomic_inc(&req->wb_count);
return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&nfsi->req_lock);
+ ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
*
* Interruptible by signals only if mounted with intr flag.
*/
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
else
idx_end = idx_start + npages - 1;
- spin_lock(&nfsi->req_lock);
next = idx_start;
while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
+ spin_lock(&nfsi->req_lock);
if (error < 0)
return error;
- spin_lock(&nfsi->req_lock);
res++;
}
- spin_unlock(&nfsi->req_lock);
return res;
}
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
+
+ spin_lock(&nfsi->req_lock);
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
+}
+
/*
* nfs_scan_dirty - Scan an inode for dirty requests
* @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
}
return res;
}
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ return 0;
+}
#endif
static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
if (!bdi_write_congested(bdi))
return 0;
+
+ nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
if (intr) {
struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
- if (error < 0)
+ if (error < 0) {
+ if (new)
+ nfs_release_request(new);
return ERR_PTR(error);
+ }
continue;
}
spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_page *req;
int status = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
*/
static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
int how)
{
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->write_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_writeback_done_partial;
if (nbytes > wsize) {
- nfs_write_rpcsetup(req, data, wsize, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ wsize, offset, how);
offset += wsize;
nbytes -= wsize;
} else {
- nfs_write_rpcsetup(req, data, nbytes, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ nbytes, offset, how);
nbytes = 0;
}
nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
unsigned int count;
- if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(head, inode, how);
-
data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_writeback_done_full;
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, count, 0, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
nfs_execute_write(data);
return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
return -ENOMEM;
}
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
{
LIST_HEAD(one_request);
- struct nfs_page *req;
- int error = 0;
- unsigned int pages = 0;
+ int (*flush_one)(struct inode *, struct list_head *, int);
+ struct nfs_page *req;
+ int wpages = NFS_SERVER(inode)->wpages;
+ int wsize = NFS_SERVER(inode)->wsize;
+ int error;
- while (!list_empty(head)) {
- pages += nfs_coalesce_requests(head, &one_request, wpages);
+ flush_one = nfs_flush_one;
+ if (wsize < PAGE_CACHE_SIZE)
+ flush_one = nfs_flush_multi;
+ /* For single writes, FLUSH_STABLE is more efficient */
+ if (npages <= wpages && npages == NFS_I(inode)->npages
+ && nfs_list_entry(head->next)->wb_bytes <= wsize)
+ how |= FLUSH_STABLE;
+
+ do {
+ nfs_coalesce_requests(head, &one_request, wpages);
req = nfs_list_entry(one_request.next);
- error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+ error = flush_one(inode, &one_request, how);
if (error < 0)
- break;
- }
- if (error >= 0)
- return pages;
-
+ goto out_err;
+ } while (!list_empty(head));
+ return 0;
+out_err:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
/*
* Handle a write reply that flushed part of a page.
*/
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
- dprintk(", error = %d\n", status);
+ req->wb_context->error = task->tk_status;
+ dprintk(", error = %d\n", task->tk_status);
} else {
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
nfs_writepage_release(req);
}
+static const struct rpc_call_ops nfs_write_partial_ops = {
+ .rpc_call_done = nfs_writeback_done_partial,
+ .rpc_release = nfs_writedata_release,
+};
+
/*
* Handle a write reply that flushes a whole page.
*
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req;
struct page *page;
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
+ req->wb_context->error = task->tk_status;
end_page_writeback(page);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", status);
+ dprintk(", error = %d\n", task->tk_status);
goto next;
}
end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_write_full_ops = {
+ .rpc_call_done = nfs_writeback_done_full,
+ .rpc_release = nfs_writedata_release,
+};
+
+
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ int status;
dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ status = NFS_PROTO(data->inode)->write_done(task, data);
+ if (status != 0)
+ return status;
+ nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
if (task->tk_status >= 0 && resp->count < argp->count) {
static unsigned long complain;
+ nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
argp->stable = NFS_FILE_SYNC;
}
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
-
- /*
- * Process the nfs_page list
- */
- data->complete(data, task->tk_status);
+ return 0;
}
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
* Set up the argument/result storage required for the RPC call.
*/
static void nfs_commit_rpcsetup(struct list_head *head,
- struct nfs_write_data *data, int how)
+ struct nfs_write_data *data,
+ int how)
{
struct nfs_page *first;
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
-
+
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
NFS_PROTO(inode)->commit_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
/*
* COMMIT call returned
*/
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_page *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
dprintk("NFS: %4d nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
}
sub_page_state(nr_unstable,res);
}
+
+static const struct rpc_call_ops nfs_commit_ops = {
+ .rpc_call_done = nfs_commit_done,
+ .rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+ return 0;
+}
#endif
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_dirty(inode, &head, idx_start, npages);
spin_unlock(&nfsi->req_lock);
if (res) {
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* For single writes, FLUSH_STABLE is more efficient */
- if (res == nfsi->npages && nfsi->npages <= server->wpages) {
- if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
- how |= FLUSH_STABLE;
- }
- error = nfs_flush_list(&head, server->wpages, how);
+ int error = nfs_flush_list(inode, &head, res, how);
+ if (error < 0)
+ return error;
}
- if (error < 0)
- return error;
return res;
}
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(inode, &head, how);
+ int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
}
#endif
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
- unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
int nocommit = how & FLUSH_NOCOMMIT;
- int wait = how & FLUSH_WAIT;
- int error;
-
- how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+ int pages, ret;
+ how &= ~FLUSH_NOCOMMIT;
+ spin_lock(&nfsi->req_lock);
do {
- if (wait) {
- error = nfs_wait_on_requests(inode, idx_start, npages);
- if (error != 0)
- continue;
- }
- error = nfs_flush_inode(inode, idx_start, npages, how);
- if (error != 0)
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ if (ret != 0)
continue;
- if (!nocommit)
- error = nfs_commit_inode(inode, how);
- } while (error > 0);
- return error;
+ pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+ if (pages != 0) {
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_flush_list(inode, &head, pages, how);
+ spin_lock(&nfsi->req_lock);
+ continue;
+ }
+ if (nocommit)
+ break;
+ pages = nfs_scan_commit(inode, &head, 0, 0);
+ if (pages == 0)
+ break;
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_commit_list(inode, &head, how);
+ spin_lock(&nfsi->req_lock);
+ } while (ret >= 0);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
}
int nfs_init_writepagecache(void)
@@ -1407,17 +1521,13 @@ int nfs_init_writepagecache(void)
if (nfs_wdata_cachep == NULL)
return -ENOMEM;
- nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
- mempool_alloc_slab,
- mempool_free_slab,
- nfs_wdata_cachep);
+ nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
+ nfs_wdata_cachep);
if (nfs_wdata_mempool == NULL)
return -ENOMEM;
- nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
- mempool_alloc_slab,
- mempool_free_slab,
- nfs_wdata_cachep);
+ nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
+ nfs_wdata_cachep);
if (nfs_commit_mempool == NULL)
return -ENOMEM;