137 files changed, 4197 insertions, 3049 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index 95d72aec1c1..94e2f92ab2e 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -235,6 +234,7 @@ struct Tcreate {
 	struct v9fs_str name;
 	u32 perm;
 	u8 mode;
+	struct v9fs_str extension;
 };
 
 struct Rcreate {
@@ -364,7 +364,7 @@ int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 		  struct v9fs_fcall **rcall);
 
 int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-		  u32 perm, u8 mode, struct v9fs_fcall **rcall);
+	u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
 
 int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 		u64 offset, u32 count, struct v9fs_fcall **rcall);
@@ -372,3 +372,4 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
 		 u32 count, const char __user * data,
 		 struct v9fs_fcall **rcall);
+int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 2f4ce43f7b6..87897f84dfb 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,10 +1,9 @@
-obj-$(CONFIG_9P_FS) := 9p2000.o
+obj-$(CONFIG_9P_FS) := 9p.o
 
-9p2000-objs := \
+9p-objs := \
 	trans_fd.o \
-	trans_sock.o \
 	mux.o \
-	9p.o \
+	fcall.o \
 	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
@@ -14,5 +13,6 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 	vfs_dentry.o \
 	error.o \
 	v9fs.o \
-	fid.o
+	fid.o \
+	fcprint.o
 
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index bba81714246..a767e05b60b 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -666,7 +665,8 @@ struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
 	return fc;
 }
 
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended)
 {
 	int size;
 	struct v9fs_fcall *fc;
@@ -674,6 +674,9 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	struct cbuf *bufp = &buffer;
 
 	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
+	if (extended && extension!=NULL)
+		size += 2 + strlen(extension);	/* extension[s] */
+
 	fc = v9fs_create_common(bufp, size, TCREATE);
 	if (IS_ERR(fc))
 		goto error;
@@ -682,6 +685,8 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
 	v9fs_put_str(bufp, name, &fc->params.tcreate.name);
 	v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
 	v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+	if (extended)
+		v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
 
 	if (buf_check_overflow(bufp)) {
 		kfree(fc);
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index f5896628dae..dd5b6b1b610 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -39,7 +38,8 @@ struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
 struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
 	char **wnames);
 struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
+	char *extension, int extended);
 struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
 struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
 	const char __user *data);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index fe551032788..4228c0bb3c3 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -30,6 +29,7 @@
 #define DEBUG_MUX		(1<<5)
 #define DEBUG_TRANS		(1<<6)
 #define DEBUG_SLABS	      	(1<<7)
+#define DEBUG_FCALL		(1<<8)
 
 #define DEBUG_DUMP_PKT		0
 
diff --git a/fs/9p/error.c b/fs/9p/error.c
index e4b6f8f38b6..981fe8ecd78 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -11,9 +11,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/error.h b/fs/9p/error.h
index a9794e85fe5..5f3ca522b31 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -12,9 +12,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/9p.c b/fs/9p/fcall.c
index c148e6ba07e..71742ba150c 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/fcall.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/9p/9p.c
+ *  linux/fs/9p/fcall.c
  *
  *  This file contains functions to perform synchronous 9P calls
  *
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -152,10 +151,9 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 /**
  * v9fs_v9fs_t_flush - flush a pending transaction
  * @v9ses: 9P2000 session information
- * @tag: tid to release
+ * @tag: tag to release
  *
  */
-
 int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 {
 	int ret;
@@ -172,7 +170,7 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 
 	return ret;
 }
-#endif  /*  0  */
+#endif
 
 /**
  * v9fs_t_stat - read a file's meta-data
@@ -334,8 +332,8 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
  */
 
 int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-	      u32 perm, u8 mode, struct v9fs_fcall **rcp)
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
+	u8 mode, char *extension, struct v9fs_fcall **rcp)
 {
 	int ret;
 	struct v9fs_fcall *tc;
@@ -343,7 +341,9 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
 		fid, name, perm, mode);
 
-	tc = v9fs_create_tcreate(fid, name, perm, mode);
+	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
+		v9ses->extended);
+
 	if (!IS_ERR(tc)) {
 		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
 		kfree(tc);
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
new file mode 100644
index 00000000000..583e827baeb
--- /dev/null
+++ b/fs/9p/fcprint.c
@@ -0,0 +1,346 @@
+/*
+ *  linux/fs/9p/fcprint.c
+ *
+ *  Print 9P call.
+ *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+static int
+v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
+{
+	int n;
+	char b[10];
+
+	n = 0;
+	if (q->type & V9FS_QTDIR)
+		b[n++] = 'd';
+	if (q->type & V9FS_QTAPPEND)
+		b[n++] = 'a';
+	if (q->type & V9FS_QTAUTH)
+		b[n++] = 'A';
+	if (q->type & V9FS_QTEXCL)
+		b[n++] = 'l';
+	if (q->type & V9FS_QTTMP)
+		b[n++] = 't';
+	if (q->type & V9FS_QTSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
+		q->version, b);
+}
+
+static int
+v9fs_printperm(char *buf, int buflen, int perm)
+{
+	int n;
+	char b[15];
+
+	n = 0;
+	if (perm & V9FS_DMDIR)
+		b[n++] = 'd';
+	if (perm & V9FS_DMAPPEND)
+		b[n++] = 'a';
+	if (perm & V9FS_DMAUTH)
+		b[n++] = 'A';
+	if (perm & V9FS_DMEXCL)
+		b[n++] = 'l';
+	if (perm & V9FS_DMTMP)
+		b[n++] = 't';
+	if (perm & V9FS_DMDEVICE)
+		b[n++] = 'D';
+	if (perm & V9FS_DMSOCKET)
+		b[n++] = 'S';
+	if (perm & V9FS_DMNAMEDPIPE)
+		b[n++] = 'P';
+	if (perm & V9FS_DMSYMLINK)
+		b[n++] = 'L';
+	b[n] = '\0';
+
+	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
+}
+
+static int
+v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
+{
+	int n;
+
+	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
+		st->name.str, st->uid.len, st->uid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
+
+	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
+
+	n += scnprintf(buf+n, buflen-n, " q ");
+	n += v9fs_printqid(buf+n, buflen-n, &st->qid);
+	n += scnprintf(buf+n, buflen-n, " m ");
+	n += v9fs_printperm(buf+n, buflen-n, st->mode);
+	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
+		st->atime, st->mtime, (long long int) st->length);
+
+	if (extended)
+		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
+			st->extension.len, st->extension.str);
+
+	return n;
+}
+
+static int
+v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	int i, n;
+
+	i = n = 0;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	return n;
+}
+
+static int
+v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
+{
+	return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
+}
+
+int
+v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
+{
+	int i, ret, type, tag;
+
+	if (!fc)
+		return scnprintf(buf, buflen, "<NULL>");
+
+	type = fc->id;
+	tag = fc->tag;
+
+	ret = 0;
+	switch (type) {
+	case TVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tversion tag %u msize %u version '%.*s'", tag,
+			fc->params.tversion.msize, fc->params.tversion.version.len,
+			fc->params.tversion.version.str);
+		break;
+
+	case RVERSION:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rversion tag %u msize %u version '%.*s'", tag,
+			fc->params.rversion.msize, fc->params.rversion.version.len,
+			fc->params.rversion.version.str);
+		break;
+
+	case TAUTH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
+			fc->params.tauth.afid, fc->params.tauth.uname.len,
+			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
+			fc->params.tauth.aname.str);
+		break;
+
+	case RAUTH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
+		break;
+
+	case TATTACH:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
+			tag, fc->params.tattach.fid, fc->params.tattach.afid,
+			fc->params.tattach.uname.len, fc->params.tattach.uname.str,
+			fc->params.tattach.aname.len, fc->params.tattach.aname.str);
+		break;
+
+	case RATTACH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
+		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
+		break;
+
+	case RERROR:
+		ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
+			tag, fc->params.rerror.error.len,
+			fc->params.rerror.error.str);
+		if (extended)
+			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
+				fc->params.rerror.errno);
+		break;
+
+	case TFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
+			tag, fc->params.tflush.oldtag);
+		break;
+
+	case RFLUSH:
+		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
+		break;
+
+	case TWALK:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twalk tag %u fid %d newfid %d nwname %d", tag,
+			fc->params.twalk.fid, fc->params.twalk.newfid,
+			fc->params.twalk.nwname);
+		for(i = 0; i < fc->params.twalk.nwname; i++)
+			ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
+				fc->params.twalk.wnames[i].len,
+				fc->params.twalk.wnames[i].str);
+		break;
+
+	case RWALK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
+			tag, fc->params.rwalk.nwqid);
+		for(i = 0; i < fc->params.rwalk.nwqid; i++)
+			ret += v9fs_printqid(buf+ret, buflen-ret,
+				&fc->params.rwalk.wqids[i]);
+		break;
+
+	case TOPEN:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Topen tag %u fid %d mode %d", tag,
+			fc->params.topen.fid, fc->params.topen.mode);
+		break;
+
+	case ROPEN:
+		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
+		ret += scnprintf(buf+ret, buflen-ret," iounit %d",
+			fc->params.ropen.iounit);
+		break;
+
+	case TCREATE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
+			fc->params.tcreate.fid, fc->params.tcreate.name.len,
+			fc->params.tcreate.name.str);
+
+		ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
+		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
+			fc->params.tcreate.mode);
+		break;
+
+	case RCREATE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
+		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
+		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
+			fc->params.rcreate.iounit);
+		break;
+
+	case TREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Tread tag %u fid %d offset %lld count %u", tag,
+			fc->params.tread.fid,
+			(long long int) fc->params.tread.offset,
+			fc->params.tread.count);
+		break;
+
+	case RREAD:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Rread tag %u count %u data ", tag,
+			fc->params.rread.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
+			fc->params.rread.count);
+		break;
+
+	case TWRITE:
+		ret += scnprintf(buf+ret, buflen-ret,
+			"Twrite tag %u fid %d offset %lld count %u data ",
+			tag, fc->params.twrite.fid,
+			(long long int) fc->params.twrite.offset,
+			fc->params.twrite.count);
+		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
+			fc->params.twrite.count);
+		break;
+
+	case RWRITE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
+			tag, fc->params.rwrite.count);
+		break;
+
+	case TCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
+			tag, fc->params.tclunk.fid);
+		break;
+
+	case RCLUNK:
+		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
+		break;
+
+	case TREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
+			tag, fc->params.tremove.fid);
+		break;
+
+	case RREMOVE:
+		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
+		break;
+
+	case TSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
+			tag, fc->params.tstat.fid);
+		break;
+
+	case RSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
+			extended);
+		break;
+
+	case TWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
+			tag, fc->params.twstat.fid);
+		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
+			extended);
+		break;
+
+	case RWSTAT:
+		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
+		break;
+
+	default:
+		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
+		break;
+	}
+
+	return ret;
+}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index c4d13bf904d..b7608af07ce 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005, 2006 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 1fc2dd08d75..aa974d6875c 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -4,9 +4,8 @@
  *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index e2ae60adda9..3e5b124a721 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -70,7 +69,7 @@ struct v9fs_mux_data {
 	int msize;
 	unsigned char *extended;
 	struct v9fs_transport *trans;
-	struct v9fs_idpool tidpool;
+	struct v9fs_idpool tagpool;
 	int err;
 	wait_queue_head_t equeue;
 	struct list_head req_list;
@@ -280,8 +279,8 @@ struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
 	m->msize = msize;
 	m->extended = extended;
 	m->trans = trans;
-	idr_init(&m->tidpool.pool);
-	init_MUTEX(&m->tidpool.lock);
+	idr_init(&m->tagpool.pool);
+	init_MUTEX(&m->tagpool.lock);
 	m->err = 0;
 	init_waitqueue_head(&m->equeue);
 	INIT_LIST_HEAD(&m->req_list);
@@ -635,6 +634,14 @@ static void v9fs_read_work(void *a)
 			goto error;
 		}
 
+		if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+			char buf[150];
+
+			v9fs_printfcall(buf, sizeof(buf), m->rcall,
+				*m->extended);
+			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
+		}
+
 		rcall = m->rcall;
 		rbuf = m->rbuf;
 		if (m->rpos > n) {
@@ -740,6 +747,13 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
 
 	v9fs_set_tag(tc, n);
 
+	if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
+		char buf[150];
+
+		v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
+		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
+	}
+
 	req->tag = n;
 	req->tcall = tc;
 	req->rcall = NULL;
@@ -965,7 +979,7 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 {
 	int tag;
 
-	tag = v9fs_get_idpool(&m->tidpool);
+	tag = v9fs_get_idpool(&m->tagpool);
 	if (tag < 0)
 		return V9FS_NOTAG;
 	else
@@ -974,6 +988,6 @@ static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
 
 static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
 {
-	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
-		v9fs_put_idpool(tag, &m->tidpool);
+	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
+		v9fs_put_idpool(tag, &m->tagpool);
 }
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 17144fdfa11..e90bfd32ea4 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 5b2ce21b10f..94e0a7fd9fc 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -1,15 +1,16 @@
 /*
  * linux/fs/9p/trans_fd.c
  *
- * File Descriptor Transport Layer
+ * Fd transport layer.  Includes deprecated socket layer.
  *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -25,6 +26,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
@@ -40,89 +42,119 @@
 #include "v9fs.h"
 #include "transport.h"
 
+#define V9FS_PORT 564
+
 struct v9fs_trans_fd {
-	struct file *in_file;
-	struct file *out_file;
+	struct file *rd;
+	struct file *wr;
 };
 
 /**
- * v9fs_fd_recv - receive from a socket
+ * v9fs_fd_read- read from a fd
  * @v9ses: session information
  * @v: buffer to receive data into
  * @len: size of receive buffer
  *
  */
-
-static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+	int ret;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking read ...\n");
+
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
+	return ret;
 }
 
 /**
- * v9fs_fd_send - send to a socket
+ * v9fs_fd_write - write to a socket
  * @v9ses: session information
  * @v: buffer to send data from
  * @len: size of send buffer
  *
  */
-
-static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
 {
-	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
-	mm_segment_t oldfs = get_fs();
-	int ret = 0;
+	int ret;
+	mm_segment_t oldfs;
+	struct v9fs_trans_fd *ts;
 
-	if (!trans || trans->status != Connected || !ts)
-		return -EIO;
+	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
+		return -EREMOTEIO;
+
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		dprintk(DEBUG_ERROR, "blocking write ...\n");
 
 	oldfs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
 	set_fs(oldfs);
 
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		trans->status = Disconnected;
 	return ret;
 }
 
-/**
- * v9fs_fd_init - initialize file descriptor transport
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
 {
-	struct v9fs_trans_fd *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
+	int ret, n;
+	struct v9fs_trans_fd *ts;
+	mm_segment_t oldfs;
 
-	if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
-		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return -ENOPROTOOPT;
-	}
+	if (!trans || trans->status != Connected || !(ts = trans->priv))
+		return -EREMOTEIO;
 
-	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-	if (!ts)
-		return -ENOMEM;
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	ts->in_file = fget( v9ses->rfdno );
-	ts->out_file = fget( v9ses->wfdno );
+	oldfs = get_fs();
+	set_fs(get_ds());
 
-	if (!ts->in_file || !ts->out_file) {
-		if (ts->in_file)
-			fput(ts->in_file);
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		goto end;
 
-		if (ts->out_file)
-			fput(ts->out_file);
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0) {
+			ret = n;
+			goto end;
+		}
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+	}
 
+      end:
+	set_fs(oldfs);
+	return ret;
+}
+
+static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
+{
+	struct v9fs_transport *trans = v9ses->transport;
+	struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
+					   GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->rd = fget(rfd);
+	ts->wr = fget(wfd);
+	if (!ts->rd || !ts->wr) {
+		if (ts->rd)
+			fput(ts->rd);
+		if (ts->wr)
+			fput(ts->wr);
 		kfree(ts);
 		return -EIO;
 	}
@@ -133,84 +165,145 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 	return 0;
 }
 
-
-/**
- * v9fs_fd_close - shutdown file descriptor
- * @trans: private socket structure
- *
- */
-
-static void v9fs_fd_close(struct v9fs_transport *trans)
+static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
+			char *data)
 {
-	struct v9fs_trans_fd *ts;
-
-	if (!trans)
-		return;
-
-	ts = xchg(&trans->priv, NULL);
+	if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
+		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		return -ENOPROTOOPT;
+	}
 
-	if (!ts)
-		return;
+	return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
+}
 
-	trans->status = Disconnected;
-	if (ts->in_file)
-		fput(ts->in_file);
+static int v9fs_socket_open(struct v9fs_session_info *v9ses,
+			    struct socket *csocket)
+{
+	int fd, ret;
+
+	csocket->sk->sk_allocation = GFP_NOIO;
+	if ((fd = sock_map_fd(csocket)) < 0) {
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
+		ret = fd;
+	      release_csocket:
+		sock_release(csocket);
+		return ret;
+	}
 
-	if (ts->out_file)
-		fput(ts->out_file);
+	if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
+		sockfd_put(csocket);
+		eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
+		goto release_csocket;
+	}
 
-	kfree(ts);
+	((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
+	    O_NONBLOCK;
+	return 0;
 }
 
-static unsigned int
-v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
+			 char *data)
 {
-	int ret, n;
-	struct v9fs_trans_fd *ts;
-	mm_segment_t oldfs;
+	int ret;
+	struct socket *csocket = NULL;
+	struct sockaddr_in sin_server;
+
+	sin_server.sin_family = AF_INET;
+	sin_server.sin_addr.s_addr = in_aton(addr);
+	sin_server.sin_port = htons(v9ses->port);
+	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+
+	if (!csocket) {
+		eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
+		return -1;
+	}
 
-	if (!trans)
-		return -EIO;
+	ret = csocket->ops->connect(csocket,
+				    (struct sockaddr *)&sin_server,
+				    sizeof(struct sockaddr_in), 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_tcp: problem connecting socket to %s\n",
+			addr);
+		return ret;
+	}
 
-	ts = trans->priv;
-	if (trans->status != Connected || !ts)
-		return -EIO;
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	oldfs = get_fs();
-	set_fs(get_ds());
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	int ret;
+	struct socket *csocket;
+	struct sockaddr_un sun_server;
+
+	if (strlen(addr) > UNIX_PATH_MAX) {
+		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+			addr);
+		return -ENAMETOOLONG;
+	}
 
-	if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
-		ret = -EIO;
-		goto end;
+	sun_server.sun_family = PF_UNIX;
+	strcpy(sun_server.sun_path, addr);
+	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+			sizeof(struct sockaddr_un) - 1, 0);
+	if (ret < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
+			addr, ret);
+		return ret;
 	}
 
-	ret = ts->in_file->f_op->poll(ts->in_file, pt);
+	return v9fs_socket_open(v9ses, csocket);
+}
 
-	if (ts->out_file != ts->in_file) {
-		if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
-			ret = -EIO;
-			goto end;
-		}
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_fd *ts;
 
-		n = ts->out_file->f_op->poll(ts->out_file, pt);
+	if (!trans)
+		return;
 
-		ret &= ~POLLOUT;
-		n &= ~POLLIN;
+	ts = xchg(&trans->priv, NULL);
 
-		ret |= n;
-	}
+	if (!ts)
+		return;
 
-end:
-	set_fs(oldfs);
-	return ret;
+	trans->status = Disconnected;
+	if (ts->rd)
+		fput(ts->rd);
+	if (ts->wr)
+		fput(ts->wr);
+	kfree(ts);
 }
 
-
 struct v9fs_transport v9fs_trans_fd = {
 	.init = v9fs_fd_init,
-	.write = v9fs_fd_send,
-	.read = v9fs_fd_recv,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
 	.close = v9fs_fd_close,
 	.poll = v9fs_fd_poll,
 };
 
+struct v9fs_transport v9fs_trans_tcp = {
+	.init = v9fs_tcp_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+	.init = v9fs_unix_init,
+	.write = v9fs_fd_write,
+	.read = v9fs_fd_read,
+	.close = v9fs_fd_close,
+	.poll = v9fs_fd_poll,
+};
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
deleted file mode 100644
index 44e830697ac..00000000000
--- a/fs/9p/trans_sock.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * linux/fs/9p/trans_socket.c
- *
- * Socket Transport Layer
- *
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/config.h>
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/ipv6.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/un.h>
-#include <asm/uaccess.h>
-#include <linux/inet.h>
-#include <linux/idr.h>
-#include <linux/file.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "transport.h"
-
-#define V9FS_PORT 564
-
-struct v9fs_trans_sock {
-	struct socket *s;
-	struct file *filp;
-};
-
-/**
- * v9fs_sock_recv - receive from a socket
- * @v9ses: session information
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
-	if (ret <= 0) {
-		if (ret != -ERESTARTSYS && ret != -EAGAIN)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-/**
- * v9fs_sock_send - send to a socket
- * @v9ses: session information
- * @v: buffer to send data from
- * @len: size of send buffer
- *
- */
-
-static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct v9fs_trans_sock *ts;
-
-	if (!trans || trans->status == Disconnected) {
-		dprintk(DEBUG_ERROR, "disconnected ...\n");
-		return -EREMOTEIO;
-	}
-
-	ts = trans->priv;
-	if (!ts) {
-		dprintk(DEBUG_ERROR, "no transport ...\n");
-		return -EREMOTEIO;
-	}
-
-	if (!(ts->filp->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
-	set_fs(oldfs);
-
-	if (ret < 0) {
-		if (ret != -ERESTARTSYS)
-			trans->status = Disconnected;
-	}
-
-	return ret;
-}
-
-static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
-	struct poll_table_struct *pt) {
-
-	int ret;
-	struct v9fs_trans_sock *ts;
-	mm_segment_t oldfs;
-
-	if (!trans) {
-		dprintk(DEBUG_ERROR, "no transport\n");
-		return -EIO;
-	}
-
-	ts = trans->priv;
-	if (trans->status != Connected || !ts) {
-		dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
-		return -EIO;
-	}
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-
-	if (!ts->filp->f_op || !ts->filp->f_op->poll) {
-		dprintk(DEBUG_ERROR, "no poll operation\n");
-		ret = -EIO;
-		goto end;
-	}
-
-	ret = ts->filp->f_op->poll(ts->filp, pt);
-
-end:
-	set_fs(oldfs);
-	return ret;
-}
-
-
-/**
- * v9fs_tcp_init - initialize TCP socket
- * @v9ses: session information
- * @addr: address of server to mount
- * @data: mount options
- *
- */
-
-static int
-v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
-{
-	struct socket *csocket = NULL;
-	struct sockaddr_in sin_server;
-	int rc = 0;
-	struct v9fs_trans_sock *ts = NULL;
-	struct v9fs_transport *trans = v9ses->transport;
-	int fd;
-
-	trans->status = Disconnected;
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	if (!addr)
-		return -EINVAL;
-
-	dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
-
-	sin_server.sin_family = AF_INET;
-	sin_server.sin_addr.s_addr = in_aton(addr);
-	sin_server.sin_port = htons(v9ses->port);
-	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-	rc = csocket->ops->connect(csocket,
-				   (struct sockaddr *)&sin_server,
-				   sizeof(struct sockaddr_in), 0);
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_tcp: problem connecting socket to %s\n",
-			addr);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_unix_init - initialize UNIX domain socket
- * @v9ses: session information
- * @dev_name: path to named pipe
- * @data: mount options
- *
- */
-
-static int
-v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
-	       char *data)
-{
-	int rc, fd;
-	struct socket *csocket;
-	struct sockaddr_un sun_server;
-	struct v9fs_transport *trans;
-	struct v9fs_trans_sock *ts;
-
-	rc = 0;
-	csocket = NULL;
-	trans = v9ses->transport;
-
-	trans->status = Disconnected;
-
-	if (strlen(dev_name) > UNIX_PATH_MAX) {
-		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
-			dev_name);
-		return -ENOMEM;
-	}
-
-	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
-	if (!ts)
-		return -ENOMEM;
-
-	trans->priv = ts;
-	ts->s = NULL;
-	ts->filp = NULL;
-
-	sun_server.sun_family = PF_UNIX;
-	strcpy(sun_server.sun_path, dev_name);
-	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
-	rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
-		sizeof(struct sockaddr_un) - 1, 0);	/* -1 *is* important */
-	if (rc < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
-			dev_name, rc);
-		return rc;
-	}
-	csocket->sk->sk_allocation = GFP_NOIO;
-
-	fd = sock_map_fd(csocket);
-	if (fd < 0) {
-		sock_release(csocket);
-		kfree(ts);
-		trans->priv = NULL;
-		return fd;
-	}
-
-	ts->s = csocket;
-	ts->filp = fget(fd);
-	ts->filp->f_flags |= O_NONBLOCK;
-	trans->status = Connected;
-
-	return 0;
-}
-
-/**
- * v9fs_sock_close - shutdown socket
- * @trans: private socket structure
- *
- */
-
-static void v9fs_sock_close(struct v9fs_transport *trans)
-{
-	struct v9fs_trans_sock *ts;
-
-	if (!trans)
-		return;
-
-	ts = trans->priv;
-
-	if ((ts) && (ts->filp)) {
-		fput(ts->filp);
-		ts->filp = NULL;
-		ts->s = NULL;
-		trans->status = Disconnected;
-	}
-
-	kfree(ts);
-
-	trans->priv = NULL;
-}
-
-struct v9fs_transport v9fs_trans_tcp = {
-	.init = v9fs_tcp_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
-
-struct v9fs_transport v9fs_trans_unix = {
-	.init = v9fs_unix_init,
-	.write = v9fs_sock_send,
-	.read = v9fs_sock_recv,
-	.close = v9fs_sock_close,
-	.poll = v9fs_sock_poll,
-};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 91fcdb94b36..b38a4b8a41c 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -7,9 +7,8 @@
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61352491ba3..d37416eb579 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -51,7 +50,7 @@ enum {
 	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
 	Opt_rfdno, Opt_wfdno,
 	/* String options */
-	Opt_name, Opt_remotename,
+	Opt_uname, Opt_remotename,
 	/* Options that take no arguments */
 	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
 	/* Error token */
@@ -67,7 +66,7 @@ static match_table_t tokens = {
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
 	{Opt_debug, "debug=%x"},
-	{Opt_name, "name=%s"},
+	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_unix, "proto=unix"},
 	{Opt_tcp, "proto=tcp"},
@@ -116,7 +115,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token < Opt_name) {
+		if (token < Opt_uname) {
 			if ((ret = match_int(&args[0], &option)) < 0) {
 				dprintk(DEBUG_ERROR,
 					"integer field, but no integer?\n");
@@ -158,7 +157,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		case Opt_fd:
 			v9ses->proto = PROTO_FD;
 			break;
-		case Opt_name:
+		case Opt_uname:
 			match_strcpy(v9ses->name, &args[0]);
 			break;
 		case Opt_remotename:
@@ -289,7 +288,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	/* set global debug level */
 	v9fs_debug_level = v9ses->debug;
 
-	/* id pools that are session-dependent: FIDs and TIDs */
+	/* id pools that are session-dependent: fids and tags */
 	idr_init(&v9ses->fidpool.pool);
 	init_MUTEX(&v9ses->fidpool.lock);
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index f337da7a0ee..c134d104cb2 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -91,6 +90,3 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_DEFUSER	"nobody"
 #define V9FS_DEFANAME	""
 
-/* inital pool sizes for fids and tags */
-#define V9FS_START_FIDS 8192
-#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index a759278acaa..43c9f7de031 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -5,9 +5,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 8100fb5171b..efda46fb64d 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 12c9cc926b7..062daa6000a 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -51,7 +50,7 @@
  *
  */
 
-int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(struct dentry *dentry)
 {
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 	return 1;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index cd5eeb032d6..766f11f1215 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index de3a129698d..59e74416340 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -69,29 +68,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	fid = v9fs_get_idpool(&v9ses->fidpool);
 	if (fid < 0) {
-			eprintk(KERN_WARNING, "newfid fails!\n");
-			return -ENOSPC;
-		}
+		eprintk(KERN_WARNING, "newfid fails!\n");
+		return -ENOSPC;
+	}
 
 	err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL);
 	if (err < 0) {
-			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+		dprintk(DEBUG_ERROR, "rewalk didn't work\n");
 		goto put_fid;
 	}
 
-	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
-	if (vfid == NULL) {
-		dprintk(DEBUG_ERROR, "out of memory\n");
-		goto clunk_fid;
-		}
-
-		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
-		/* translate open mode appropriately */
+	/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+	/* translate open mode appropriately */
 	omode = v9fs_uflags2omode(file->f_flags);
 	err = v9fs_t_open(v9ses, fid, omode, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("open failed", fcall);
-		goto destroy_vfid;
+		goto clunk_fid;
+	}
+
+	vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+	if (vfid == NULL) {
+		dprintk(DEBUG_ERROR, "out of memory\n");
+		err = -ENOMEM;
+		goto clunk_fid;
 	}
 
 	file->private_data = vfid;
@@ -106,15 +106,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	return 0;
 
-destroy_vfid:
-	v9fs_fid_destroy(vfid);
-
 clunk_fid:
 	v9fs_t_clunk(v9ses, fid);
 
 put_fid:
 	v9fs_put_idpool(fid, &v9ses->fidpool);
-		kfree(fcall);
+	kfree(fcall);
 
 	return err;
 }
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 651a9e14d9a..133db366d30 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -7,9 +7,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -255,8 +254,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 }
 
 static int
-v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
-	u32 perm, u8 mode, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
+v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
+	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
 {
 	u32 fid;
 	int err;
@@ -271,14 +270,14 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("clone error", fcall);
-		goto error;
+		goto put_fid;
 	}
 	kfree(fcall);
 
-	err = v9fs_t_create(v9ses, fid, name, perm, mode, &fcall);
+	err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
 	if (err < 0) {
 		PRINT_FCALL_ERROR("create fails", fcall);
-		goto error;
+		goto clunk_fid;
 	}
 
 	if (iounit)
@@ -293,7 +292,11 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name,
 	kfree(fcall);
 	return 0;
 
-error:
+clunk_fid:
+	v9fs_t_clunk(v9ses, fid);
+	fid = V9FS_NOFID;
+
+put_fid:
 	if (fid >= 0)
 		v9fs_put_idpool(fid, &v9ses->fidpool);
 
@@ -348,7 +351,7 @@ error:
 	return ERR_PTR(err);
 }
 
-struct inode *
+static struct inode *
 v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
 	struct super_block *sb)
 {
@@ -474,7 +477,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		flags = O_RDWR;
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, v9fs_uflags2omode(flags), &fid, &qid, &iounit);
+		perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
 
 	if (err)
 		goto error;
@@ -550,7 +553,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, NULL, &fid, NULL, NULL);
 
 	if (err) {
 		dprintk(DEBUG_ERROR, "create error %d\n", err);
@@ -1008,11 +1011,13 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	/* copy extension buffer into buffer */
 	if (fcall->params.rstat.stat.extension.len < buflen)
-		buflen = fcall->params.rstat.stat.extension.len;
+		buflen = fcall->params.rstat.stat.extension.len + 1;
 
-	memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+	memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
 	buffer[buflen-1] = 0;
 
+	dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len,
+		fcall->params.rstat.stat.extension.str, buffer);
 	retval = buflen;
 
       FreeFcall:
@@ -1072,7 +1077,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!link)
 		link = ERR_PTR(-ENOMEM);
 	else {
-		len = v9fs_readlink(dentry, link, strlen(link));
+		len = v9fs_readlink(dentry, link, PATH_MAX);
 
 		if (len < 0) {
 			__putname(link);
@@ -1109,10 +1114,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct v9fs_fid *dfid, *vfid;
 	struct inode *inode;
-	struct v9fs_fcall *fcall;
-	struct v9fs_wstat wstat;
 
-	fcall = NULL;
 	inode = NULL;
 	vfid = NULL;
 	v9ses = v9fs_inode2v9ses(dir);
@@ -1125,7 +1127,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	}
 
 	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, &fid, NULL, NULL);
+		perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
 
 	if (err)
 		goto error;
@@ -1148,23 +1150,11 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 		goto error;
 	}
 
-	/* issue a Twstat */
-	v9fs_blank_wstat(&wstat);
-	wstat.muid = v9ses->name;
-	wstat.extension = (char *) extension;
-	err = v9fs_t_wstat(v9ses, vfid->fid, &wstat, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("wstat error", fcall);
-		goto error;
-	}
-
-	kfree(fcall);
 	dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(dentry, inode);
 	return 0;
 
 error:
-	kfree(fcall);
 	if (vfid)
 		v9fs_fid_destroy(vfid);
 
@@ -1224,7 +1214,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	name = __getname();
-	sprintf(name, "hardlink(%d)\n", oldfid->fid);
+	sprintf(name, "%d\n", oldfid->fid);
 	retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
 	__putname(name);
 
@@ -1253,6 +1243,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 		return -EINVAL;
 
 	name = __getname();
+	if (!name)
+		return -ENOMEM;
 	/* build extension */
 	if (S_ISBLK(mode))
 		sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d05318fa684..b0a0ae509c0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -8,9 +8,8 @@
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -262,7 +261,7 @@ static struct super_operations v9fs_super_ops = {
 };
 
 struct file_system_type v9fs_fs_type = {
-	.name = "9P",
+	.name = "9p",
 	.get_sb = v9fs_get_sb,
 	.kill_sb = v9fs_kill_super,
 	.owner = THIS_MODULE,
diff --git a/fs/Kconfig b/fs/Kconfig
index c8d0a209120..e207be68d4c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1555,6 +1555,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CAST5
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
diff --git a/fs/aio.c b/fs/aio.c
index aec2b1916d1..e41e932ba48 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -122,10 +122,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 	info->nr = 0;
 	info->ring_pages = info->internal_pages;
 	if (nr_pages > AIO_RING_PAGES) {
-		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 		if (!info->ring_pages)
 			return -ENOMEM;
-		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
 	}
 
 	info->mmap_size = nr_pages * PAGE_SIZE;
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 785f6b2d5d1..b7d6b920f65 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -118,7 +118,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data,
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
  * @ds: Datastrem to read from
- * @buf: Buffer in wich to place long symlink data
+ * @buf: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
  * Returns the number of bytes read
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c2eac2a50bd..537893a1601 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -500,17 +500,22 @@ out:
 #define INTERPRETER_AOUT 1
 #define INTERPRETER_ELF 2
 
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
+#endif
 
 static unsigned long randomize_stack_top(unsigned long stack_top)
 {
 	unsigned int random_variable = 0;
 
-	if (current->flags & PF_RANDOMIZE)
-		random_variable = get_random_int() % (8*1024*1024);
+	if (current->flags & PF_RANDOMIZE) {
+		random_variable = get_random_int() & STACK_RND_MASK;
+		random_variable <<= PAGE_SHIFT;
+	}
 #ifdef CONFIG_STACK_GROWSUP
-	return PAGE_ALIGN(stack_top + random_variable);
+	return PAGE_ALIGN(stack_top) + random_variable;
 #else
-	return PAGE_ALIGN(stack_top - random_variable);
+	return PAGE_ALIGN(stack_top) - random_variable;
 #endif
 }
 
@@ -1334,7 +1339,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
-	psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDTZW"[i];
+	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
@@ -1465,12 +1470,11 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 		read_lock(&tasklist_lock);
 		do_each_thread(g,p)
 			if (current->mm == p->mm && current != p) {
-				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
+				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
 					read_unlock(&tasklist_lock);
 					goto cleanup;
 				}
-				memset(tmp, 0, sizeof(*tmp));
 				INIT_LIST_HEAD(&tmp->list);
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b3076e8ee9..a2e48c999c2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -572,8 +572,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	csp -= sizeof(unsigned long);
 	__put_user(bprm->argc, (unsigned long *) csp);
 
-	if (csp != sp)
-		BUG();
+	BUG_ON(csp != sp);
 
 	/* fill in the argv[] array */
 #ifdef CONFIG_MMU
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 108d56bbd0d..69f44dcdb0b 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -36,6 +36,7 @@
 #include <linux/personality.h>
 #include <linux/init.h>
 #include <linux/flat.h>
+#include <linux/syscalls.h>
 
 #include <asm/byteorder.h>
 #include <asm/system.h>
@@ -426,6 +427,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 	int i, rev, relocs = 0;
 	loff_t fpos;
 	unsigned long start_code, end_code;
+	int ret;
+	int exec_fileno;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
 	inode = bprm->file->f_dentry->d_inode;
@@ -450,7 +453,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 		 */
 		if (strncmp(hdr->magic, "#!", 2))
 			printk("BINFMT_FLAT: bad header magic\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
@@ -458,14 +462,16 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 	if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
 		printk("BINFMT_FLAT: bad flat file version 0x%x (supported 0x%x and 0x%x)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 	
 	/* Don't allow old format executables to use shared libraries */
 	if (rev == OLD_FLAT_VERSION && id != 0) {
 		printk("BINFMT_FLAT: shared libraries are not available before rev 0x%x\n",
 				(int) FLAT_VERSION);
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 
 	/*
@@ -478,7 +484,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 #ifndef CONFIG_BINFMT_ZFLAT
 	if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
 		printk("Support for ZFLAT executables is not enabled.\n");
-		return -ENOEXEC;
+		ret = -ENOEXEC;
+		goto err;
 	}
 #endif
 
@@ -490,14 +497,27 @@ static int load_flat_file(struct linux_binprm * bprm,
 	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
-	if (data_len + bss_len > rlim)
-		return -ENOMEM;
+	if (data_len + bss_len > rlim) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* check file descriptor */
+	exec_fileno = get_unused_fd();
+	if (exec_fileno < 0) {
+		ret = -EMFILE;
+		goto err;
+	}
+	get_file(bprm->file);
+	fd_install(exec_fileno, bprm->file);
 
 	/* Flush all traces of the currently running executable */
 	if (id == 0) {
 		result = flush_old_exec(bprm);
-		if (result)
-			return result;
+		if (result) {
+			ret = result;
+			goto err_close;
+		}
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX);
@@ -527,7 +547,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			if (!textpos)
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to mmap process text, errno %d\n", (int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		down_write(&current->mm->mmap_sem);
@@ -542,7 +563,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to allocate RAM for process data, errno %d\n",
 					(int)-datapos);
 			do_munmap(current->mm, textpos, text_len);
-			return realdatastart;
+			ret = realdatastart;
+			goto err_close;
 		}
 		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
 
@@ -564,7 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read data+bss, errno %d\n", (int)-result);
 			do_munmap(current->mm, textpos, text_len);
 			do_munmap(current->mm, realdatastart, data_len + extra);
-			return result;
+			ret = result;
+			goto err_close;
 		}
 
 		reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
@@ -582,7 +605,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 				textpos = (unsigned long) -ENOMEM;
 			printk("Unable to allocate RAM for process text/data, errno %d\n",
 					(int)-textpos);
-			return(textpos);
+			ret = textpos;
+			goto err_close;
 		}
 
 		realdatastart = textpos + ntohl(hdr->data_start);
@@ -627,7 +651,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			printk("Unable to read code+data+bss, errno %d\n",(int)-result);
 			do_munmap(current->mm, textpos, text_len + data_len + extra +
 				MAX_SHARED_LIBS * sizeof(unsigned long));
-			return result;
+			ret = result;
+			goto err_close;
 		}
 	}
 
@@ -690,8 +715,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			unsigned long addr;
 			if (*rp) {
 				addr = calc_reloc(*rp, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 				*rp = addr;
 			}
 		}
@@ -718,8 +745,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			relval = ntohl(reloc[i]);
 			addr = flat_get_relocate_addr(relval);
 			rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
-			if (rp == (unsigned long *)RELOC_FAILED)
-				return -ENOEXEC;
+			if (rp == (unsigned long *)RELOC_FAILED) {
+				ret = -ENOEXEC;
+				goto err_close;
+			}
 
 			/* Get the pointer's value.  */
 			addr = flat_get_addr_from_rp(rp, relval, flags);
@@ -731,8 +760,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 				if ((flags & FLAT_FLAG_GOTPIC) == 0)
 					addr = ntohl(addr);
 				addr = calc_reloc(addr, libinfo, id, 0);
-				if (addr == RELOC_FAILED)
-					return -ENOEXEC;
+				if (addr == RELOC_FAILED) {
+					ret = -ENOEXEC;
+					goto err_close;
+				}
 
 				/* Write back the relocated pointer.  */
 				flat_put_addr_at_rp(rp, addr, relval);
@@ -752,6 +783,10 @@ static int load_flat_file(struct linux_binprm * bprm,
 			stack_len);
 
 	return 0;
+err_close:
+	sys_close(exec_fileno);
+err:
+	return ret;
 }
 
 
diff --git a/fs/bio.c b/fs/bio.c
index 0a8c59cb68f..73e664c01d3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -636,12 +636,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
 		goto out;
 
-	memset(pages, 0, nr_pages * sizeof(struct page *));
-
 	for (i = 0; i < iov_count; i++) {
 		unsigned long uaddr = (unsigned long)iov[i].iov_base;
 		unsigned long len = iov[i].iov_len;
@@ -1186,12 +1184,11 @@ void bioset_free(struct bio_set *bs)
 
 struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
 {
-	struct bio_set *bs = kmalloc(sizeof(*bs), GFP_KERNEL);
+	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 
 	if (!bs)
 		return NULL;
 
-	memset(bs, 0, sizeof(*bs));
 	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
 			mempool_free_slab, bio_slab);
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 6d77ce9f54e..3b3ab528192 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -160,12 +160,7 @@ int sync_blockdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(sync_blockdev);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
+static void __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	DQUOT_SYNC(sb);
@@ -177,7 +172,16 @@ int fsync_super(struct super_block *sb)
 		sb->s_op->sync_fs(sb, 1);
 	sync_blockdev(sb->s_bdev);
 	sync_inodes_sb(sb, 1);
+}
 
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	__fsync_super(sb);
 	return sync_blockdev(sb->s_bdev);
 }
 
@@ -216,19 +220,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		sync_inodes_sb(sb, 0);
-		DQUOT_SYNC(sb);
-
-		lock_super(sb);
-		if (sb->s_dirt && sb->s_op->write_super)
-			sb->s_op->write_super(sb);
-		unlock_super(sb);
-
-		if (sb->s_op->sync_fs)
-			sb->s_op->sync_fs(sb, 1);
-
-		sync_blockdev(sb->s_bdev);
-		sync_inodes_sb(sb, 1);
+		__fsync_super(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 5c36345c9bf..8c6eb04d31e 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -146,12 +146,10 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	int ret = 0;
 	int i;
 
-	cd = kmalloc(sizeof(struct char_device_struct), GFP_KERNEL);
+	cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
 	if (cd == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	memset(cd, 0, sizeof(struct char_device_struct));
-
 	mutex_lock(&chrdevs_lock);
 
 	/* temporary */
@@ -466,9 +464,8 @@ static struct kobj_type ktype_cdev_dynamic = {
 
 struct cdev *cdev_alloc(void)
 {
-	struct cdev *p = kmalloc(sizeof(struct cdev), GFP_KERNEL);
+	struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
 	if (p) {
-		memset(p, 0, sizeof(struct cdev));
 		p->kobj.ktype = &ktype_cdev_dynamic;
 		INIT_LIST_HEAD(&p->list);
 		kobject_init(&p->kobj);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index c607d923350..5d052713326 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -51,7 +51,7 @@ void coda_cache_clear_all(struct super_block *sb)
         struct coda_sb_info *sbi;
 
         sbi = coda_sbp(sb);
-        if (!sbi) BUG();
+	BUG_ON(!sbi);
 
 	atomic_inc(&permission_epoch);
 }
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 23aeef5aa81..4c9fecbfa91 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -120,8 +120,7 @@ void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid,
 	
 	cii = ITOC(inode);
 
-	if (!coda_fideq(&cii->c_fid, oldfid))
-		BUG();
+	BUG_ON(!coda_fideq(&cii->c_fid, oldfid));
 
 	/* replace fid and rehash inode */
 	/* XXX we probably need to hold some lock here! */
diff --git a/fs/compat.c b/fs/compat.c
index 2a88477330f..ef5a0771592 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1476,10 +1476,9 @@ int compat_do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
@@ -2170,9 +2169,12 @@ asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *
 
 	default:
 		err = -EINVAL;
-		goto done;
+		break;
 	}
 
+	if (err)
+		goto done;
+
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
 	/* The __user pointer casts are valid because of the set_fs() */
diff --git a/fs/dcache.c b/fs/dcache.c
index 653f64ce98e..93958464850 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -325,10 +325,13 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
 
 struct dentry * d_find_alias(struct inode *inode)
 {
-	struct dentry *de;
-	spin_lock(&dcache_lock);
-	de = __d_find_alias(inode, 0);
-	spin_unlock(&dcache_lock);
+	struct dentry *de = NULL;
+
+	if (!list_empty(&inode->i_dentry)) {
+		spin_lock(&dcache_lock);
+		de = __d_find_alias(inode, 0);
+		spin_unlock(&dcache_lock);
+	}
 	return de;
 }
 
@@ -486,6 +489,7 @@ repeat:
 			continue;
 		}
 		prune_one_dentry(dentry);
+		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
@@ -799,6 +803,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 	if (inode)
 		list_add(&entry->d_alias, &inode->i_dentry);
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
@@ -850,6 +855,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 	list_add(&entry->d_alias, &inode->i_dentry);
 do_negative:
 	entry->d_inode = inode;
+	fsnotify_d_instantiate(entry, inode);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 	return NULL;
@@ -980,6 +986,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+			fsnotify_d_instantiate(new, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
 			d_rehash(dentry);
@@ -989,6 +996,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 			/* d_instantiate takes dcache_lock, so we do it by hand */
 			list_add(&dentry->d_alias, &inode->i_dentry);
 			dentry->d_inode = inode;
+			fsnotify_d_instantiate(dentry, inode);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
@@ -1173,6 +1181,9 @@ void d_delete(struct dentry * dentry)
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
 	if (atomic_read(&dentry->d_count) == 1) {
+		/* remove this and other inotify debug checks after 2.6.18 */
+		dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+
 		dentry_iput(dentry);
 		fsnotify_nameremove(dentry, isdir);
 		return;
@@ -1339,6 +1350,7 @@ already_unhashed:
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 	spin_unlock(&target->d_lock);
+	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 27f3e787fac..235ed8d1f11 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -129,6 +129,7 @@ struct dio {
 	/* AIO related stuff */
 	struct kiocb *iocb;		/* kiocb */
 	int is_async;			/* is IO async ? */
+	int io_error;			/* IO error in completion path */
 	ssize_t result;                 /* IO result */
 };
 
@@ -250,6 +251,10 @@ static void finished_one_bio(struct dio *dio)
 			    ((offset + transferred) > dio->i_size))
 				transferred = dio->i_size - offset;
 
+			/* check for error in completion path */
+			if (dio->io_error)
+				transferred = dio->io_error;
+
 			dio_complete(dio, offset, transferred);
 
 			/* Complete AIO later if falling back to buffered i/o */
@@ -406,7 +411,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	int page_no;
 
 	if (!uptodate)
-		dio->result = -EIO;
+		dio->io_error = -EIO;
 
 	if (dio->is_async && dio->rw == READ) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
@@ -971,6 +976,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->next_block_for_io = -1;
 
 	dio->page_errors = 0;
+	dio->io_error = 0;
 	dio->result = 0;
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c2b16fda13..a0f682cdd03 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	switch (op) {
 	case EPOLL_CTL_ADD:
 		if (!epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		break;
 	case EPOLL_CTL_MOD:
 		if (epi) {
-			epds.events |= POLLERR | POLLHUP;
+			epds.events |= POLLERR | POLLHUP | POLLRDHUP;
 			error = ep_modify(ep, epi, &epds);
 		} else
 			error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 0b515ac5313..995cba3c62b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	if (error)
 		goto out;
 
@@ -477,7 +477,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ);
+	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
 	file = ERR_PTR(err);
 
 	if (!err) {
@@ -1143,10 +1143,9 @@ int do_execve(char * filename,
 	int i;
 
 	retval = -ENOMEM;
-	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
 		goto out_ret;
-	memset(bprm, 0, sizeof(*bprm));
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 268b73f5847..7e30bae174e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -211,8 +211,6 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
 		seq_puts(seq, ",grpid");
-	else
-		seq_puts(seq, ",nogrpid");
 
 #if defined(CONFIG_QUOTA)
 	if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index cb16b4c5d5d..ce4f82b9e52 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,11 +7,11 @@
  * Universite Pierre et Marie Curie (Paris VI)
  */
 
-#ifdef EXT3FS_DEBUG
-
 #include <linux/buffer_head.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
 
-#include "ext3_fs.h"
+#ifdef EXT3FS_DEBUG
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 785c7213a54..f3fbe2d030f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,8 +381,8 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 			list_move(&inode->i_list, &sb->s_dirty);
 		}
 		spin_unlock(&inode_lock);
-		cond_resched();
 		iput(inode);
+		cond_resched();
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0)
 			break;
diff --git a/fs/inode.c b/fs/inode.c
index a51c671c54c..85da11044ad 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -91,7 +91,7 @@ DEFINE_SPINLOCK(inode_lock);
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
  */
-DEFINE_MUTEX(iprune_mutex);
+static DEFINE_MUTEX(iprune_mutex);
 
 /*
  * Statistics gathering..
diff --git a/fs/inotify.c b/fs/inotify.c
index 0ee39ef591c..a61e93e1785 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -38,7 +38,6 @@
 #include <asm/ioctls.h>
 
 static atomic_t inotify_cookie;
-static atomic_t inotify_watches;
 
 static kmem_cache_t *watch_cachep;
 static kmem_cache_t *event_cachep;
@@ -381,6 +380,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 }
 
 /*
+ * inotify_inode_watched - returns nonzero if there are watches on this inode
+ * and zero otherwise.  We call this lockless, we do not care if we race.
+ */
+static inline int inotify_inode_watched(struct inode *inode)
+{
+	return !list_empty(&inode->inotify_watches);
+}
+
+/*
+ * Get child dentry flag into synch with parent inode.
+ * Flag should always be clear for negative dentrys.
+ */
+static void set_dentry_child_flags(struct inode *inode, int watched)
+{
+	struct dentry *alias;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode) {
+				WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+				continue;
+			}
+			spin_lock(&child->d_lock);
+			if (watched) {
+				WARN_ON(child->d_flags &
+						DCACHE_INOTIFY_PARENT_WATCHED);
+				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+			} else {
+				WARN_ON(!(child->d_flags &
+					DCACHE_INOTIFY_PARENT_WATCHED));
+				child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
+			}
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
  * create_watch - creates a watch on the given device.
  *
  * Callers must hold dev->mutex.  Calls inotify_dev_get_wd() so may sleep.
@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
 	get_inotify_watch(watch);
 
 	atomic_inc(&dev->user->inotify_watches);
-	atomic_inc(&inotify_watches);
 
 	return watch;
 }
@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 	list_del(&watch->i_list);
 	list_del(&watch->d_list);
 
+	if (!inotify_inode_watched(watch->inode))
+		set_dentry_child_flags(watch->inode, 0);
+
 	atomic_dec(&dev->user->inotify_watches);
-	atomic_dec(&inotify_watches);
 	idr_remove(&dev->idr, watch->wd);
 	put_inotify_watch(watch);
 }
@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
 	remove_watch_no_event(watch, dev);
 }
 
+/* Kernel API */
+
 /*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
+ * inotify_d_instantiate - instantiate dcache entry for inode
  */
-static inline int inotify_inode_watched(struct inode *inode)
+void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
 {
-	return !list_empty(&inode->inotify_watches);
+	struct dentry *parent;
+
+	if (!inode)
+		return;
+
+	WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+	spin_lock(&entry->d_lock);
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	spin_unlock(&entry->d_lock);
 }
 
-/* Kernel API */
+/*
+ * inotify_d_move - dcache entry has been moved
+ */
+void inotify_d_move(struct dentry *entry)
+{
+	struct dentry *parent;
+
+	parent = entry->d_parent;
+	if (inotify_inode_watched(parent->d_inode))
+		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+	else
+		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+}
 
 /**
  * inotify_inode_queue_event - queue an event to all watches on this inode
@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 	struct dentry *parent;
 	struct inode *inode;
 
-	if (!atomic_read (&inotify_watches))
+	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
 		return;
 
 	spin_lock(&dentry->d_lock);
@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		goto out;
 	}
 
+	if (!inotify_inode_watched(inode))
+		set_dentry_child_flags(inode, 1);
+
 	/* Add the watch to the device's and the inode's list */
 	list_add(&watch->d_list, &dev->watches);
 	list_add(&watch->i_list, &inode->inotify_watches);
@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
 	inotify_max_user_watches = 8192;
 
 	atomic_set(&inotify_cookie, 0);
-	atomic_set(&inotify_watches, 0);
 
 	watch_cachep = kmem_cache_create("inotify_watch_cache",
 					 sizeof(struct inotify_watch),
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 95a628d8cac..7f96b5cb678 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -33,9 +33,11 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/pagemap.h>
+#include <linux/kthread.h>
+#include <linux/proc_fs.h>
+
 #include <asm/uaccess.h>
 #include <asm/page.h>
-#include <linux/proc_fs.h>
 
 EXPORT_SYMBOL(journal_start);
 EXPORT_SYMBOL(journal_restart);
@@ -111,18 +113,15 @@ static void commit_timeout(unsigned long __data)
 
 static int kjournald(void *arg)
 {
-	journal_t *journal = (journal_t *) arg;
+	journal_t *journal = arg;
 	transaction_t *transaction;
-	struct timer_list timer;
 
-	daemonize("kjournald");
-
-	/* Set up an interval timer which can be used to trigger a
-           commit wakeup after the commit interval expires */
-	init_timer(&timer);
-	timer.data = (unsigned long) current;
-	timer.function = commit_timeout;
-	journal->j_commit_timer = &timer;
+	/*
+	 * Set up an interval timer which can be used to trigger a commit wakeup
+	 * after the commit interval expires
+	 */
+	setup_timer(&journal->j_commit_timer, commit_timeout,
+			(unsigned long)current);
 
 	/* Record that the journal thread is running */
 	journal->j_task = current;
@@ -146,7 +145,7 @@ loop:
 	if (journal->j_commit_sequence != journal->j_commit_request) {
 		jbd_debug(1, "OK, requests differ\n");
 		spin_unlock(&journal->j_state_lock);
-		del_timer_sync(journal->j_commit_timer);
+		del_timer_sync(&journal->j_commit_timer);
 		journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
 		goto loop;
@@ -203,7 +202,7 @@ loop:
 
 end_loop:
 	spin_unlock(&journal->j_state_lock);
-	del_timer_sync(journal->j_commit_timer);
+	del_timer_sync(&journal->j_commit_timer);
 	journal->j_task = NULL;
 	wake_up(&journal->j_wait_done_commit);
 	jbd_debug(1, "Journal thread exiting.\n");
@@ -212,7 +211,7 @@ end_loop:
 
 static void journal_start_thread(journal_t *journal)
 {
-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
+	kthread_run(kjournald, journal, "kjournald");
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
 }
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5fc40888f4c..ada31fa272e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 
 	/* Set up the commit timer for the new transaction. */
-	journal->j_commit_timer->expires = transaction->t_expires;
-	add_timer(journal->j_commit_timer);
+	journal->j_commit_timer.expires = transaction->t_expires;
+	add_timer(&journal->j_commit_timer);
 
 	J_ASSERT(journal->j_running_transaction == NULL);
 	journal->j_running_transaction = transaction;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b..bce74446870 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
 {
 	struct nlm_wait *block;
 
-	BUG_ON(req->a_block != NULL);
 	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block == NULL)
-		return -ENOMEM;
-	block->b_host = host;
-	block->b_lock = fl;
-	init_waitqueue_head(&block->b_wait);
-	block->b_status = NLM_LCK_BLOCKED;
-
-	list_add(&block->b_list, &nlm_blocked);
-	req->a_block = block;
-
-	return 0;
+	if (block != NULL) {
+		block->b_host = host;
+		block->b_lock = fl;
+		init_waitqueue_head(&block->b_wait);
+		block->b_status = NLM_LCK_BLOCKED;
+		list_add(&block->b_list, &nlm_blocked);
+	}
+	return block;
 }
 
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block = req->a_block;
-
 	if (block == NULL)
 		return;
-	req->a_block = NULL;
 	list_del(&block->b_list);
 	kfree(block);
 }
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
 /*
  * Block on a lock
  */
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
-	struct nlm_wait	*block = req->a_block;
 	long ret;
 
 	/* A borken server might ask us to block even if we didn't
 	 * request it. Just say no!
 	 */
-	if (!req->a_args.block)
+	if (block == NULL)
 		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
 	ret = wait_event_interruptible_timeout(block->b_wait,
 			block->b_status != NLM_LCK_BLOCKED,
 			timeout);
-
-	if (block->b_status != NLM_LCK_BLOCKED) {
-		req->a_res.status = block->b_status;
-		block->b_status = NLM_LCK_BLOCKED;
-	}
-
-	return ret;
+	if (ret < 0)
+		return -ERESTARTSYS;
+	req->a_res.status = block->b_status;
+	return 0;
 }
 
 /*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		struct file_lock *fl_blocked = block->b_lock;
 
-		if (!nlm_compare_locks(fl_blocked, fl))
+		if (fl_blocked->fl_start != fl->fl_start)
+			continue;
+		if (fl_blocked->fl_end != fl->fl_end)
+			continue;
+		/*
+		 * Careful! The NLM server will return the 32-bit "pid" that
+		 * we put on the wire: in this case the lockowner "pid".
+		 */
+		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
 		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
 			continue;
@@ -147,34 +144,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
  */
 
 /*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- *        Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
-	struct file_lock *fl;
-	struct inode *inode;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
-			continue;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
-	}
-}
-
-/*
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
  */
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
 	host->h_state++;
 	host->h_nextrebind = 0;
 	nlm_rebind_host(host);
-	nlmclnt_mark_reclaim(host);
+
+	/*
+	 * Mark the locks for reclaiming.
+	 */
+	list_splice_init(&host->h_granted, &host->h_reclaim);
+
 	dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
-	struct list_head *tmp;
-	struct file_lock *fl;
-	struct inode *inode;
+	struct file_lock *fl, *next;
 
 	daemonize("%s-reclaim", host->h_name);
 	allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
 
 	/* First, reclaim all locks that have been marked. */
 restart:
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
+	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+		list_del_init(&fl->fl_u.nfs_fl.list);
 
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
-			continue;
-
-		fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
-		nlmclnt_reclaim(host, fl);
 		if (signalled())
-			break;
+			continue;
+		if (nlmclnt_reclaim(host, fl) == 0)
+			list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		goto restart;
 	}
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa33..f96e38155b5 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = system_utsname.nodename;
 	lock->oh.data = req->a_owner;
-	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->pid, system_utsname.nodename);
-	locks_copy_lock(&lock->fl, fl);
+	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
+				system_utsname.nodename);
+	lock->svid = fl->fl_u.nfs_fl.owner->pid;
+	lock->fl.fl_start = fl->fl_start;
+	lock->fl.fl_end = fl->fl_end;
+	lock->fl.fl_type = fl->fl_type;
 }
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-	struct file_lock *fl = &req->a_args.lock.fl;
-
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-}
-
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
-	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
-	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
-	call->a_args.lock.oh.len = lock->oh.len;
-
-	/* set default data area */
-	call->a_args.lock.oh.data = call->a_owner;
-
-	if (lock->oh.len > NLMCLNT_OHSIZE) {
-		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmclnt_freegrantargs(call);
-			return 0;
-		}
-		call->a_args.lock.oh.data = (u8 *) data;
-	}
-
-	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
-	return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
-		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
+	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *call)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
 	struct nlm_host		*host;
-	struct nlm_rqst		reqst, *call = &reqst;
+	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
 	int			status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
 
-	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	if (host == NULL)
 		return -ENOLCK;
 
-	/* Create RPC client handle if not there, and copy soft
-	 * and intr flags from NFS client. */
-	if (host->h_rpcclnt == NULL) {
-		struct rpc_clnt	*clnt;
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return -ENOMEM;
 
-		/* Bind an rpc client to this host handle (does not
-		 * perform a portmapper lookup) */
-		if (!(clnt = nlm_bind_host(host))) {
-			status = -ENOLCK;
-			goto done;
-		}
-		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr = nfssrv->client->cl_intr;
-	}
+	nlmclnt_locks_init_private(fl, host);
+	/* Set up the argument struct */
+	nlmclnt_setlockargs(call, fl);
 
 	/* Keep the old signal mask */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-		call = nlmclnt_alloc_call();
-		if (!call) {
-			status = -ENOMEM;
-			goto out_restore;
-		}
 		call->a_flags = RPC_TASK_ASYNC;
-	} else {
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-		memset(call, 0, sizeof(*call));
-		locks_init_lock(&call->a_args.lock.fl);
-		locks_init_lock(&call->a_res.lock.fl);
 	}
-	call->a_host = host;
-
-	nlmclnt_locks_init_private(fl, host);
-
-	/* Set up the argument struct */
-	nlmclnt_setlockargs(call, fl);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	else
 		status = -EINVAL;
 
- out_restore:
+	fl->fl_ops->fl_release_private(fl);
+	fl->fl_ops = NULL;
+
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-done:
 	dprintk("lockd: clnt proc returns %d\n", status);
-	nlm_release_host(host);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
  */
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
 	struct nlm_rqst	*call;
 
-	while (!signalled()) {
-		call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
-		if (call) {
-			memset(call, 0, sizeof(*call));
+	for(;;) {
+		call = kzalloc(sizeof(*call), GFP_KERNEL);
+		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
+			call->a_host = host;
 			return call;
 		}
-		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+		if (signalled())
+			break;
+		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
+	nlm_release_host(host);
 	return NULL;
 }
 
+void nlm_release_call(struct nlm_rqst *call)
+{
+	nlm_release_host(call->a_host);
+	nlmclnt_release_lockargs(call);
+	kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+	return nlm_release_call(data);
+}
+
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 {
 	DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
-	struct rpc_message msg = {
-		.rpc_argp	= &req->a_args,
-		.rpc_resp	= &req->a_res,
-	};
-	int		status;
+	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
 			(int)proc, host->h_name);
 
 	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	clnt = nlm_bind_host(host);
+	if (clnt == NULL)
+		goto out_err;
+	msg->rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
+        status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+	if (status == 0)
+		return 0;
+out_err:
+	nlm_release_call(req);
 	return status;
 }
 
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
-	struct nlm_host	*host = req->a_host;
-	struct rpc_clnt	*clnt;
-	struct nlm_args	*argp = &req->a_args;
-	struct nlm_res	*resp = &req->a_res;
 	struct rpc_message msg = {
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
+		.rpc_argp	= &req->a_args,
+		.rpc_resp	= &req->a_res,
 	};
-	int		status;
-
-	dprintk("lockd: call procedure %d on %s (async)\n",
-			(int)proc, host->h_name);
-
-	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
 
-	/* Increment host refcount */
-	nlm_get_host(host);
-        /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-	if (status < 0)
-		nlm_release_host(host);
-	return status;
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
 }
 
 /*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 	int	status;
 
 	status = nlmclnt_call(req, NLMPROC_TEST);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
-	status = req->a_res.status;
-	if (status == NLM_LCK_GRANTED) {
-		fl->fl_type = F_UNLCK;
-	} if (status == NLM_LCK_DENIED) {
-		/*
-		 * Report the conflicting lock back to the application.
-		 */
-		locks_copy_lock(fl, &req->a_res.lock.fl);
-		fl->fl_pid = 0;
-	} else {
-		return nlm_stat_to_errno(req->a_res.status);
+	switch (req->a_res.status) {
+		case NLM_LCK_GRANTED:
+			fl->fl_type = F_UNLCK;
+			break;
+		case NLM_LCK_DENIED:
+			/*
+			 * Report the conflicting lock back to the application.
+			 */
+			fl->fl_start = req->a_res.lock.fl.fl_start;
+			fl->fl_end = req->a_res.lock.fl.fl_start;
+			fl->fl_type = req->a_res.lock.fl.fl_type;
+			fl->fl_pid = 0;
+			break;
+		default:
+			status = nlm_stat_to_errno(req->a_res.status);
 	}
-
-	return 0;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-	memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
-	nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+	new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+	new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+	list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
-	fl->fl_ops = NULL;
 }
 
 static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 {
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
-	fl->fl_u.nfs_fl.flags = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+	INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
 
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	long timeout;
-	int status;
+	struct nlm_wait *block = NULL;
+	int status = -ENOLCK;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
 					host->h_name);
-		status = -ENOLCK;
 		goto out;
 	}
 
-	if (req->a_args.block) {
-		status = nlmclnt_prepare_block(req, host, fl);
-		if (status < 0)
-			goto out;
-	}
+	block = nlmclnt_prepare_block(host, fl);
 	for(;;) {
 		status = nlmclnt_call(req, NLMPROC_LOCK);
 		if (status < 0)
 			goto out_unblock;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (!req->a_args.block)
 			break;
-		/* Wait on an NLM blocking lock */
-		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
 		/* Did a reclaimer thread notify us of a server reboot? */
 		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
 			continue;
 		if (resp->status != NLM_LCK_BLOCKED)
 			break;
-		if (timeout >= 0)
-			continue;
-		/* We were interrupted. Send a CANCEL request to the server
+		/* Wait on an NLM blocking lock */
+		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		/* if we were interrupted. Send a CANCEL request to the server
 		 * and exit
 		 */
-		status = (int)timeout;
-		goto out_unblock;
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
 	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
 		fl->fl_flags |= FL_SLEEP;
+		/* Ensure the resulting lock will get added to granted list */
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
-	nlmclnt_finish_block(req);
+	nlmclnt_finish_block(block);
 	/* Cancel the blocked request if it is still pending */
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
-	nlmclnt_release_lockargs(req);
+	nlm_release_call(req);
 	return status;
 }
 
@@ -658,10 +599,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/* Clean the GRANTED flag now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call. */
-	fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
-
 	/*
 	 * Note: the server is supposed to either grant us the unlock
 	 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
@@ -669,32 +606,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	 */
 	do_vfs_lock(fl);
 
-	if (req->a_flags & RPC_TASK_ASYNC) {
-		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					&nlmclnt_unlock_ops);
-		/* Hrmf... Do the unlock early since locks_remove_posix()
-		 * really expects us to free the lock synchronously */
-		if (status < 0) {
-			nlmclnt_release_lockargs(req);
-			kfree(req);
-		}
-		return status;
-	}
+	if (req->a_flags & RPC_TASK_ASYNC)
+		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 
 	status = nlmclnt_call(req, NLMPROC_UNLOCK);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
+	status = 0;
 	if (resp->status == NLM_LCK_GRANTED)
-		return 0;
+		goto out;
 
 	if (resp->status != NLM_LCK_DENIED_NOLOCKS)
 		printk("lockd: unexpected unlock status: %d\n", resp->status);
-
 	/* What to do now? I'm out of my depth... */
-
-	return -ENOLCK;
+	status = -ENOLCK;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -716,9 +645,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 	if (status != NLM_LCK_GRANTED)
 		printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
  retry_rebind:
 	nlm_rebind_host(req->a_host);
@@ -728,6 +654,7 @@ die:
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
 	.rpc_call_done = nlmclnt_unlock_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
@@ -749,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	req = nlmclnt_alloc_call();
+	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
 		return -ENOMEM;
-	req->a_host  = host;
 	req->a_flags = RPC_TASK_ASYNC;
 
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-	if (status < 0) {
-		nlmclnt_release_lockargs(req);
-		kfree(req);
-	}
+	status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
@@ -791,6 +713,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	switch (req->a_res.status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
+	case NLM_LCK_DENIED:
 		/* Everything's good */
 		break;
 	case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	}
 
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
 
 retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
 
 static const struct rpc_call_ops nlmclnt_cancel_ops = {
 	.rpc_call_done = nlmclnt_cancel_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8a..112ebf8b8df 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
 	nlm_hosts[hash]    = host;
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
+	INIT_LIST_HEAD(&host->h_granted);
+	INIT_LIST_HEAD(&host->h_reclaim);
 
 	if (++nrhosts > NLM_HOST_MAX)
 		next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
 		if (IS_ERR(clnt))
 			goto forgetit;
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
+		clnt->cl_softrtry = 1; /* All queries are soft */
 
 		host->h_rpcclnt = clnt;
 	}
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
 {
 	if (host != NULL) {
 		dprintk("lockd: release host %s\n", host->h_name);
-		atomic_dec(&host->h_count);
 		BUG_ON(atomic_read(&host->h_count) < 0);
+		if (atomic_dec_and_test(&host->h_count)) {
+			BUG_ON(!list_empty(&host->h_lockowners));
+			BUG_ON(!list_empty(&host->h_granted));
+			BUG_ON(!list_empty(&host->h_reclaim));
+		}
 	}
 }
 
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
 					rpc_destroy_client(host->h_rpcclnt);
 				}
 			}
-			BUG_ON(!list_empty(&host->h_lockowners));
 			kfree(host);
 			nrhosts--;
 		}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a89cb8aa2c8..3fc683f46b3 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	struct rpc_clnt	*clnt;
 	int		status;
 	struct nsm_args	args;
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= res,
+	};
 
 	clnt = nsm_create();
 	if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
 	args.proc = NLMPROC_NSM_NOTIFY;
 	memset(res, 0, sizeof(*res));
 
-	status = rpc_call(clnt, proc, &args, res, 0);
+	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	status = rpc_call_sync(clnt, &msg, 0);
 	if (status < 0)
 		printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
 			status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo	nsm_procedures[] = {
 		.p_encode	= (kxdrproc_t) xdr_encode_mon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat_res,
 		.p_bufsiz	= MAX(SM_mon_sz, SM_monres_sz) << 2,
+		.p_statidx	= SM_MON,
+		.p_name		= "MONITOR",
 	},
 [SM_UNMON] = {
 		.p_proc		= SM_UNMON,
 		.p_encode	= (kxdrproc_t) xdr_encode_unmon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat,
 		.p_bufsiz	= MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+		.p_statidx	= SM_UNMON,
+		.p_name		= "UNMONITOR",
 	},
 };
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06..a2dd9ccb9b3 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
 /*
  * Obtain client and file from arguments
  */
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 /*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
+};
+
+/*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
 
-	if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
+
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
 
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
 }
 
 /*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 
 
 /*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	if (!(call = nlmclnt_alloc_call()))
-		return rpc_system_err;
-
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
-	if (!host) {
-		kfree(call);
-		return rpc_system_err;
-	}
-
-	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		goto error;
-
-	return rpc_success;
- error:
-	kfree(call);
-	nlm_release_host(host);
-	return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
-	struct nlm_rqst	*call = data;
-
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
-	.rpc_call_done = nlm4svc_callback_exit,
-};
-
-/*
  * NLM Server procedures.
  */
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a..d2b66bad7d5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
 #define nlm_deadlock	nlm_lck_denied
 #endif
 
+static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
 	struct nlm_block **bp, *b;
 
 	dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+	kref_get(&block->b_count);
 	if (block->b_queued)
 		nlmsvc_remove_block(block);
 	bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
 		if (b == block) {
 			*bp = block->b_next;
 			block->b_queued = 0;
+			nlmsvc_release_block(block);
 			return 1;
 		}
 	}
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
 }
 
 /*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
  */
 static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	**head, *block;
 	struct file_lock	*fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end, lock->fl.fl_type);
 	for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
-		fl = &block->b_call.a_args.lock.fl;
+		fl = &block->b_call->a_args.lock.fl;
 		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
 				block->b_file, fl->fl_pid,
 				(long long)fl->fl_start,
 				(long long)fl->fl_end, fl->fl_type,
-				nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
-			if (remove) {
-				*head = block->b_next;
-				block->b_queued = 0;
-			}
+			kref_get(&block->b_count);
 			return block;
 		}
 	}
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
 	for (block = nlm_blocked; block; block = block->b_next) {
 		dprintk("cookie: head of blocked queue %p, block %p\n", 
 			nlm_blocked, block);
-		if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
 				&& nlm_cmp_addr(sin, &block->b_host->h_addr))
 			break;
 	}
 
+	if (block != NULL)
+		kref_get(&block->b_count);
 	return block;
 }
 
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 {
 	struct nlm_block	*block;
 	struct nlm_host		*host;
-	struct nlm_rqst		*call;
+	struct nlm_rqst		*call = NULL;
 
 	/* Create host handle for callback */
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return NULL;
+
 	/* Allocate memory for block, and initialize arguments */
-	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
 		goto failed;
-	memset(block, 0, sizeof(*block));
-	locks_init_lock(&block->b_call.a_args.lock.fl);
-	locks_init_lock(&block->b_call.a_res.lock.fl);
+	kref_init(&block->b_count);
 
-	if (!nlmclnt_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
-	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
-	block->b_call.a_args.cookie = *cookie;	/* see above */
+	call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+	call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+	call->a_args.cookie = *cookie;	/* see above */
 
 	dprintk("lockd: created block %p...\n", block);
 
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
 	block->b_file   = file;
+	file->f_count++;
 
 	/* Add to file's list of blocks */
 	block->b_fnext  = file->f_blocks;
 	file->f_blocks  = block;
 
 	/* Set up RPC arguments for callback */
-	call = &block->b_call;
-	call->a_host    = host;
+	block->b_call = call;
 	call->a_flags   = RPC_TASK_ASYNC;
+	call->a_block = block;
 
 	return block;
 
 failed_free:
 	kfree(block);
 failed:
-	nlm_release_host(host);
+	nlm_release_call(call);
 	return NULL;
 }
 
@@ -227,29 +234,26 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
 {
-	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
-	struct nlm_file		*file = block->b_file;
-	struct nlm_block	**bp;
-	int status = 0;
-
-	dprintk("lockd: deleting block %p...\n", block);
+	int status;
+	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
 	nlmsvc_remove_block(block);
-	if (unlock)
-		status = posix_unblock_lock(file->f_file, fl);
+	return status;
+}
 
-	/* If the block is in the middle of a GRANT callback,
-	 * don't kill it yet. */
-	if (block->b_incall) {
-		nlmsvc_insert_block(block, NLM_NEVER);
-		block->b_done = 1;
-		return status;
-	}
+static void nlmsvc_free_block(struct kref *kref)
+{
+	struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+	struct nlm_file		*file = block->b_file;
+	struct nlm_block	**bp;
 
+	dprintk("lockd: freeing block %p...\n", block);
+
+	down(&file->f_sema);
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
 		if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
 			break;
 		}
 	}
+	up(&file->f_sema);
 
-	if (block->b_host)
-		nlm_release_host(block->b_host);
-	nlmclnt_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(block->b_call);
+	nlm_release_call(block->b_call);
+	nlm_release_file(block->b_file);
 	kfree(block);
-	return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+	if (block != NULL)
+		kref_put(&block->b_count, nlmsvc_free_block);
+}
+
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+		block->b_host->h_inuse = 1;
+	up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+	struct nlm_block *block;
+
+restart:
+	down(&file->f_sema);
+	for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+		if (host != NULL && host != block->b_host)
+			continue;
+		if (!block->b_queued)
+			continue;
+		kref_get(&block->b_count);
+		up(&file->f_sema);
+		nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+		goto restart;
+	}
+	up(&file->f_sema);
 }
 
 /*
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
  */
-int
+void
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
-	struct nlm_block	*block, *next;
-	/* XXX: Will everything get cleaned up if we don't unlock here? */
+	if (action == NLM_ACT_MARK)
+		nlmsvc_act_mark(host, file);
+	else
+		nlmsvc_act_unlock(host, file);
+}
 
-	down(&file->f_sema);
-	for (block = file->f_blocks; block; block = next) {
-		next = block->b_fnext;
-		if (action == NLM_ACT_MARK)
-			block->b_host->h_inuse = 1;
-		else if (action == NLM_ACT_UNLOCK) {
-			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block, 1);
-		}
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.oh.len = lock->oh.len;
+
+	/* set default data area */
+	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
+
+	if (lock->oh.len > NLMCLNT_OHSIZE) {
+		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+		if (!data)
+			return 0;
+		call->a_args.lock.oh.data = (u8 *) data;
 	}
-	up(&file->f_sema);
-	return 0;
+
+	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+	return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+	if (call->a_args.lock.oh.data != call->a_owner)
+		kfree(call->a_args.lock.oh.data);
 }
 
 /*
@@ -297,9 +358,9 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct file_lock	*conflock;
-	struct nlm_block	*block;
+	struct nlm_block	*block, *newblock = NULL;
 	int			error;
+	u32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				wait);
 
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
-
-	lock->fl.fl_flags |= FL_LOCKD;
-
+	lock->fl.fl_flags &= ~FL_SLEEP;
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
+	/* Get existing block (in case client is busy-waiting) */
+	block = nlmsvc_lookup_block(file, lock);
+	if (block == NULL) {
+		if (newblock != NULL)
+			lock = &newblock->b_call->a_args.lock;
+	} else
+		lock = &block->b_call->a_args.lock;
 
-	if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
-		error = posix_lock_file(file->f_file, &lock->fl);
+	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
 
-		if (block)
-			nlmsvc_delete_block(block, 0);
-		up(&file->f_sema);
+	dprintk("lockd: posix_lock_file returned %d\n", error);
 
-		dprintk("lockd: posix_lock_file returned %d\n", -error);
-		switch(-error) {
+	switch(error) {
 		case 0:
-			return nlm_granted;
-		case EDEADLK:
-			return nlm_deadlock;
-		case EAGAIN:
-			return nlm_lck_denied;
+			ret = nlm_granted;
+			goto out;
+		case -EAGAIN:
+			break;
+		case -EDEADLK:
+			ret = nlm_deadlock;
+			goto out;
 		default:			/* includes ENOLCK */
-			return nlm_lck_denied_nolocks;
-		}
+			ret = nlm_lck_denied_nolocks;
+			goto out;
 	}
 
-	if (!wait) {
-		up(&file->f_sema);
-		return nlm_lck_denied;
-	}
+	ret = nlm_lck_denied;
+	if (!wait)
+		goto out;
 
-	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		up(&file->f_sema);
-		return nlm_deadlock;
-	}
+	ret = nlm_lck_blocked;
+	if (block != NULL)
+		goto out;
 
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
 	 * to claim it while doing host garbage collection */
-	if (block == NULL) {
+	if (newblock == NULL) {
 		up(&file->f_sema);
 		dprintk("lockd: blocking on this lock (allocating).\n");
-		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+		if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
 			return nlm_lck_denied_nolocks;
 		goto again;
 	}
 
 	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
-
-	if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
-		/* Now add block to block list of the conflicting lock
-		   if we haven't done so. */
-		dprintk("lockd: blocking on this lock.\n");
-		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
-	}
-
+	nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
 	up(&file->f_sema);
-	return nlm_lck_blocked;
+	nlmsvc_release_block(newblock);
+	nlmsvc_release_block(block);
+	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+	return ret;
 }
 
 /*
@@ -382,8 +439,6 @@ u32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
-	struct file_lock	*fl;
-
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
 				file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
 		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-				fl->fl_type, (long long)fl->fl_start,
-				(long long)fl->fl_end);
+				conflock->fl.fl_type,
+				(long long)conflock->fl.fl_start,
+				(long long)conflock->fl.fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
-		conflock->fl = *fl;
+		conflock->svid = conflock->fl.fl_pid;
 		return nlm_lck_denied;
 	}
 
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block, 1);
+	block = nlmsvc_lookup_block(file, lock);
 	up(&file->f_sema);
+	if (block != NULL) {
+		status = nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+	}
 	return status ? nlm_lck_denied : nlm_granted;
 }
 
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
 
 	dprintk("lockd: VFS unblock notification for block %p\n", fl);
 	for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
-		if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
 			nlmsvc_insert_block(block, 0);
 			svc_wake_up(block->b_daemon);
 			return;
@@ -508,17 +567,13 @@ static void
 nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
-	struct nlm_lock		*lock = &block->b_call.a_args.lock;
-	struct file_lock	*conflock;
+	struct nlm_lock		*lock = &block->b_call->a_args.lock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
 
-	/* First thing is lock the file */
-	down(&file->f_sema);
-
 	/* Unlink block request from list */
-	nlmsvc_remove_block(block);
+	nlmsvc_unlink_block(block);
 
 	/* If b_granted is true this means we've been here before.
 	 * Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	}
 
 	/* Try the lock operation again */
-	if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
-		/* Bummer, we blocked again */
+	lock->fl.fl_flags |= FL_SLEEP;
+	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
+
+	switch (error) {
+	case 0:
+		break;
+	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		posix_block_lock(conflock, &lock->fl);
-		up(&file->f_sema);
 		return;
-	}
-
-	/* Alright, no conflicting lock. Now lock it for real. If the
-	 * following yields an error, this is most probably due to low
-	 * memory. Retry the lock in a few seconds.
-	 */
-	if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		up(&file->f_sema);
 		return;
 	}
 
@@ -554,17 +606,15 @@ callback:
 	/* Lock was granted by VFS. */
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
-	block->b_incall  = 1;
 
 	/* Schedule next grant callback in 30 seconds */
 	nlmsvc_insert_block(block, 30 * HZ);
 
 	/* Call the client */
-	nlm_get_host(block->b_call.a_host);
-	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+	kref_get(&block->b_count);
+	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
-		nlm_release_host(block->b_call.a_host);
-	up(&file->f_sema);
+		nlmsvc_release_block(block);
 }
 
 /*
@@ -578,20 +628,10 @@ callback:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block;
+	struct nlm_block	*block = call->a_block;
 	unsigned long		timeout;
-	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
-	dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
-		nlmdbg_cookie2a(&call->a_args.cookie),
-		NIPQUAD(peer_addr->sin_addr.s_addr));
-	if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
-		dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
-			nlmdbg_cookie2a(&call->a_args.cookie),
-			NIPQUAD(peer_addr->sin_addr.s_addr));
-		return;
-	}
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	block->b_incall = 0;
+}
 
-	nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+	struct nlm_rqst		*call = data;
+
+	nlmsvc_release_block(call->a_block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
 	.rpc_call_done = nlmsvc_grant_callback,
+	.rpc_release = nlmsvc_grant_release,
 };
 
 /*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
 		return;
 	file = block->b_file;
 
-	file->f_count++;
-	down(&file->f_sema);
-	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
-			up(&file->f_sema);
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
-			up(&file->f_sema);
-			if (status == NLM_LCK_GRANTED)
-				nlmsvc_delete_block(block, 0);
-			else
-				nlmsvc_delete_block(block, 1);
+			nlmsvc_unlink_block(block);
 		}
 	}
-	nlm_release_file(file);
+	nlmsvc_release_block(block);
 }
 
 /*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
 			break;
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
+		kref_get(&block->b_count);
 		if (block->b_done)
-			nlmsvc_delete_block(block, 0);
+			nlmsvc_unlink_block(block);
 		else
 			nlmsvc_grant_blocked(block);
+		nlmsvc_release_block(block);
 	}
 
 	if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1f..d210cf304e9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
 #ifdef CONFIG_LOCKD_V4
 static u32
 cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 /*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
+};
+
+/*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
 
-	if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
+
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
 
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
 static int
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
 static int
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
 }
 
 /*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 /*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	if (!(call = nlmclnt_alloc_call()))
-		return rpc_system_err;
-
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
-	if (!host) {
-		kfree(call);
-		return rpc_system_err;
-	}
-
-	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		goto error;
-
-	return rpc_success;
- error:
-	nlm_release_host(host);
-	kfree(call);
-	return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
-	struct nlm_rqst	*call = data;
-
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
-	.rpc_call_done = nlmsvc_callback_exit,
-};
-
-/*
  * NLM Server procedures.
  */
 
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836c..27288c83da9 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
  * Traverse all shares for a given file (and host).
  * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
  */
-int
+void
 nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	struct nlm_share	*share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 		}
 		shpp = &share->s_next;
 	}
-
-	return 0;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177..c7a6e3ae44d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
 again:
 	file->f_locks = 0;
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_LOCKD))
+		if (fl->fl_lmops != &nlmsvc_lock_operations)
 			continue;
 
 		/* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
 		if (file->f_count || file->f_blocks || file->f_shares)
 			return 1;
 	} else {
-		if (nlmsvc_traverse_blocks(host, file, action)
-		 || nlmsvc_traverse_shares(host, file, action))
-			return 1;
+		nlmsvc_traverse_blocks(host, file, action);
+		nlmsvc_traverse_shares(host, file, action);
 	}
 	return nlm_traverse_locks(host, file, action);
 }
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1d700a4dd0b..f22a3764461 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm_decode_fh(p, &lock->fh))
 	 || !(p = nlm_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
 	else
 		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 	*p++ = htonl(start);
 	*p++ = htonl(len);
 
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlmclt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a530..36eb175ec33 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
 	 || !(p = nlm4_decode_fh(p, &lock->fh))
 	 || !(p = nlm4_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
 	 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
 		return NULL;
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 
 	start = loff_t_to_s64(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
 		
 		p = xdr_encode_hyper(p, start);
 		p = xdr_encode_hyper(p, len);
-		dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
-			resp->status, fl->fl_pid, fl->fl_type,
+		dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+			resp->status, (int)resp->lock.svid, fl->fl_type,
 			(long long)fl->fl_start,  (long long)fl->fl_end);
 	}
 
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d..56f996e98bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
 static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
 	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 }
 
+static void locks_release_private(struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_release_private)
+			fl->fl_ops->fl_release_private(fl);
+		fl->fl_ops = NULL;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_release_private)
+			fl->fl_lmops->fl_release_private(fl);
+		fl->fl_lmops = NULL;
+	}
+
+}
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
 	if (!list_empty(&fl->fl_link))
 		panic("Attempting to free lock on active lock list");
 
-	if (fl->fl_ops) {
-		if (fl->fl_ops->fl_release_private)
-			fl->fl_ops->fl_release_private(fl);
-		fl->fl_ops = NULL;
-	}
-
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_release_private)
-			fl->fl_lmops->fl_release_private(fl);
-		fl->fl_lmops = NULL;
-	}
-
+	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
 	locks_init_lock(lock);
 }
 
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_copy_lock)
+			fl->fl_ops->fl_copy_lock(new, fl);
+		new->fl_ops = fl->fl_ops;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_copy_lock)
+			fl->fl_lmops->fl_copy_lock(new, fl);
+		new->fl_lmops = fl->fl_lmops;
+	}
+}
+
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 {
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
-	new->fl_file = fl->fl_file;
+	new->fl_file = NULL;
 	new->fl_flags = fl->fl_flags;
 	new->fl_type = fl->fl_type;
 	new->fl_start = fl->fl_start;
 	new->fl_end = fl->fl_end;
+	new->fl_ops = NULL;
+	new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	locks_release_private(new);
+
+	__locks_copy_lock(new, fl);
+	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
-	if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
-		fl->fl_ops->fl_copy_lock(new, fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
-		fl->fl_lmops->fl_copy_lock(new, fl);
+
+	locks_copy_private(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
 	return result;
 }
 
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+		struct file_lock *conflock)
 {
 	struct file_lock *cfl;
 
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 		if (posix_locks_conflict(cfl, fl))
 			break;
 	}
+	if (cfl) {
+		__locks_copy_lock(conflock, cfl);
+		unlock_kernel();
+		return 1;
+	}
 	unlock_kernel();
-
-	return (cfl);
+	return 0;
 }
 
 EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 				fl->fl_start = request->fl_start;
 				fl->fl_end = request->fl_end;
 				fl->fl_type = request->fl_type;
-				fl->fl_u = request->fl_u;
+				locks_release_private(fl);
+				locks_copy_private(fl, request);
 				request = fl;
 				added = 1;
 			}
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock flock;
 	int error;
 
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock64 flock;
 	int error;
 
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *filp)
 }
 
 /**
- *	posix_block_lock - blocks waiting for a file lock
- *	@blocker: the lock which is blocking
- *	@waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-	locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
-/**
  *	posix_unblock_lock - stop waiting for a file lock
  *      @filp:   how the file was opened
  *	@waiter: the lock which was waiting
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index dc6a4e4abcd..4a6abc49418 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -56,7 +56,7 @@ void minix_free_block(struct inode * inode, int block)
 	unsigned int bit,zone;
 
 	if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) {
-		printk("trying to free block not in datazone\n");
+		printk("Trying to free block not in datazone\n");
 		return;
 	}
 	zone = block - sbi->s_firstdatazone + 1;
@@ -124,7 +124,7 @@ minix_V1_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -149,7 +149,7 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
 		 ino / MINIX2_INODES_PER_BLOCK;
 	*bh = sb_bread(sb, block);
 	if (!*bh) {
-		printk("unable to read i-node block\n");
+		printk("Unable to read inode block\n");
 		return NULL;
 	}
 	p = (void *)(*bh)->b_data;
@@ -204,7 +204,7 @@ void minix_free_inode(struct inode * inode)
 	bh = sbi->s_imap[ino >> 13];
 	lock_kernel();
 	if (!minix_test_and_clear_bit(ino & 8191, bh->b_data))
-		printk("minix_free_inode: bit %lu already cleared.\n", ino);
+		printk("minix_free_inode: bit %lu already cleared\n", ino);
 	unlock_kernel();
 	mark_buffer_dirty(bh);
  out:
@@ -238,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
 		return NULL;
 	}
 	if (minix_test_and_set_bit(j,bh->b_data)) {	/* shouldn't happen */
-		printk("new_inode: bit already set");
+		printk("new_inode: bit already set\n");
 		unlock_kernel();
 		iput(inode);
 		return NULL;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d9ffc43fee5..2dcccf1d1b7 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -127,11 +127,11 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
 		mark_buffer_dirty(sbi->s_sbh);
 
 		if (!(sbi->s_mount_state & MINIX_VALID_FS))
-			printk ("MINIX-fs warning: remounting unchecked fs, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting unchecked fs, "
+				"running fsck is recommended\n");
 		else if ((sbi->s_mount_state & MINIX_ERROR_FS))
-			printk ("MINIX-fs warning: remounting fs with errors, "
-				"running fsck is recommended.\n");
+			printk("MINIX-fs warning: remounting fs with errors, "
+				"running fsck is recommended\n");
 	}
 	return 0;
 }
@@ -245,11 +245,11 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		mark_buffer_dirty(bh);
 	}
 	if (!(sbi->s_mount_state & MINIX_VALID_FS))
-		printk ("MINIX-fs: mounting unchecked file system, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting unchecked file system, "
+			"running fsck is recommended\n");
  	else if (sbi->s_mount_state & MINIX_ERROR_FS)
-		printk ("MINIX-fs: mounting file system with errors, "
-			"running fsck is recommended.\n");
+		printk("MINIX-fs: mounting file system with errors, "
+			"running fsck is recommended\n");
 	return 0;
 
 out_iput:
@@ -273,19 +273,19 @@ out_no_bitmap:
 
 out_no_map:
 	if (!silent)
-		printk ("MINIX-fs: can't allocate map\n");
+		printk("MINIX-fs: can't allocate map\n");
 	goto out_release;
 
 out_no_fs:
 	if (!silent)
-		printk("VFS: Can't find a Minix or Minix V2 filesystem on device "
-		       "%s.\n", s->s_id);
+		printk("VFS: Can't find a Minix or Minix V2 filesystem "
+			"on device %s\n", s->s_id);
     out_release:
 	brelse(bh);
 	goto out;
 
 out_bad_hblock:
-	printk("MINIX-fs: blocksize too small for device.\n");
+	printk("MINIX-fs: blocksize too small for device\n");
 	goto out;
 
 out_bad_sb:
@@ -524,7 +524,7 @@ int minix_sync_inode(struct inode * inode)
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 		{
-			printk ("IO error syncing minix inode [%s:%08lx]\n",
+			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
 			err = -1;
 		}
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index ba06aef4aca..656b1347a25 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 512) {
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 3adc7675560..9adcdc754e0 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -25,9 +25,9 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
 	int n = 0;
 
 	if (block < 0) {
-		printk("minix_bmap: block<0");
+		printk("minix_bmap: block<0\n");
 	} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
-		printk("minix_bmap: block>big");
+		printk("minix_bmap: block>big\n");
 	} else if (block < 7) {
 		offsets[n++] = block;
 	} else if ((block -= 7) < 256) {
diff --git a/fs/namei.c b/fs/namei.c
index c72b940797f..98dc2e13436 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1353,6 +1353,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
+	audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino);
 
 	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
 	if (error)
@@ -1472,7 +1473,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1628,6 +1629,12 @@ do_last:
 		goto exit;
 	}
 
+	if (IS_ERR(nd->intent.open.file)) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		error = PTR_ERR(nd->intent.open.file);
+		goto exit_dput;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!path.dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
@@ -1793,7 +1800,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -1870,7 +1877,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
-		fsnotify_mkdir(dir, dentry->d_name.name);
+		fsnotify_mkdir(dir, dentry);
 	return error;
 }
 
@@ -2133,7 +2140,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 	DQUOT_INIT(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
-		fsnotify_create(dir, dentry->d_name.name);
+		fsnotify_create(dir, dentry);
 	return error;
 }
 
@@ -2210,7 +2217,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&old_dentry->d_inode->i_mutex);
 	if (!error)
-		fsnotify_create(dir, new_dentry->d_name.name);
+		fsnotify_create(dir, new_dentry);
 	return error;
 }
 
@@ -2621,16 +2628,27 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	int err = -ENOMEM;
 	char *kaddr;
 
+retry:
 	page = find_or_create_page(mapping, 0, gfp_mask);
 	if (!page)
 		goto fail;
 	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
 	if (err)
 		goto fail_map;
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, len-1);
 	kunmap_atomic(kaddr, KM_USER0);
-	mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
+	if (err == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry;
+	}
+	if (err)
+		goto fail_map;
 	/*
 	 * Notice that we are _not_ going to block here - end of page is
 	 * unmapped, so this will only try to map the rest of page, see
@@ -2640,7 +2658,8 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 	 */
 	if (!PageUptodate(page)) {
 		err = mapping->a_ops->readpage(NULL, page);
-		wait_on_page_locked(page);
+		if (err != AOP_TRUNCATED_PAGE)
+			wait_on_page_locked(page);
 	} else {
 		unlock_page(page);
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d631..71e75bcf4d2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+	struct vfsmount *mnt = v;
+	int err = 0;
+
+	/* device */
+	if (mnt->mnt_devname) {
+		seq_puts(m, "device ");
+		mangle(m, mnt->mnt_devname);
+	} else
+		seq_puts(m, "no device");
+
+	/* mount point */
+	seq_puts(m, " mounted on ");
+	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_putc(m, ' ');
+
+	/* file system type */
+	seq_puts(m, "with fstype ");
+	mangle(m, mnt->mnt_sb->s_type->name);
+
+	/* optional statistics */
+	if (mnt->mnt_sb->s_op->show_stats) {
+		seq_putc(m, ' ');
+		err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+	}
+
+	seq_putc(m, '\n');
+	return err;
+}
+
+struct seq_operations mountstats_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_vfsstat,
+};
+
 /**
  * may_umount_tree - check if a mount tree is busy
  * @mnt: root of mount tree
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a77..99d2cfbce86 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 
 	complete(&nfs_callback_info.started);
 
-	while (nfs_callback_info.users != 0 || !signalled()) {
+	for(;;) {
+		if (signalled()) {
+			if (nfs_callback_info.users == 0)
+				break;
+			flush_signals(current);
+		}
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 		svc_process(serv, rqstp);
 	}
 
+	svc_exit_thread(rqstp);
 	nfs_callback_info.pid = 0;
 	complete(&nfs_callback_info.stopped);
 	unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
 
 	lock_kernel();
 	down(&nfs_callback_sema);
-	if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
-		goto out;
-	kill_proc(nfs_callback_info.pid, SIGKILL, 1);
-	wait_for_completion(&nfs_callback_info.stopped);
-out:
+	nfs_callback_info.users--;
+	do {
+		if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+			break;
+		if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+			break;
+	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	up(&nfs_callback_sema);
 	unlock_kernel();
 	return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a9..05c38cf40b6 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
 
 static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-	uint32_t *savep;
+	uint32_t *savep = NULL;
 	unsigned status = res->status;
 	
 	if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
-	struct callback_op *op;
-	unsigned int op_nr;
+	struct callback_op *op = &callback_ops[0];
+	unsigned int op_nr = OP_CB_ILLEGAL;
 	unsigned int status = 0;
 	long maxlen;
 	unsigned res;
 
 	dprintk("%s: start\n", __FUNCTION__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (unlikely(status != 0)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-		status = htonl(NFS4ERR_OP_ILLEGAL);
-	} else
-		op = &callback_ops[op_nr];
+	if (likely(status == 0)) {
+		switch (op_nr) {
+			case OP_CB_GETATTR:
+			case OP_CB_RECALL:
+				op = &callback_ops[op_nr];
+				break;
+			default:
+				op_nr = OP_CB_ILLEGAL;
+				op = &callback_ops[0];
+				status = htonl(NFS4ERR_OP_ILLEGAL);
+		}
+	}
 
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
+	hdr_res.nops = NULL;
 	encode_compound_hdr_res(&xdr_out, &hdr_res);
 
 	for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e..d3be923d4e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 		nfs_free_delegation(delegation);
 	}
 }
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int res = 0;
+
+	if (nfsi->delegation_state == 0)
+		return 0;
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL) {
+		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+		res = 1;
+	}
+	spin_unlock(&clp->cl_lock);
+	return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce77..3858694652f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead69..06c48b385c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
 {
 	int res = 0;
 
+	dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+			inode->i_sb->s_id, inode->i_ino);
+
 	lock_kernel();
 	/* Call generic open code in order to cache credentials */
 	if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	unsigned long	timestamp;
 	int		error;
 
-	dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+			__FUNCTION__, (long long)desc->entry->cookie,
+			page->index);
 
  again:
 	timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+				__FUNCTION__, (unsigned long long)entry->cookie);
 		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
 	return status;
 }
 
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 		if (status)
 			break;
 
-		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+		dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+				(unsigned long long)entry->cookie, desc->current_index);
 
 		if (desc->file->f_pos == desc->current_index) {
 			*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
 	return status;
 }
 
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	struct page	*page;
 	int		status;
 
-	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+	dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+			__FUNCTION__, desc->page_index,
+			(long long) *desc->dir_cookie);
 
 	page = read_cache_page(inode->i_mapping, desc->page_index,
 			       (filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	if (status < 0)
 		dir_page_release(desc);
  out:
-	dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
 	return status;
  read_error:
 	page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 
 	/* Always search-by-index from the beginning of the cache */
 	if (*desc->dir_cookie == 0) {
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+				(long long)desc->file->f_pos);
 		desc->page_index = 0;
 		desc->entry->cookie = desc->entry->prev_cookie = 0;
 		desc->entry->eof = 0;
 		desc->current_index = 0;
 	} else
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+				(unsigned long long)*desc->dir_cookie);
 
 	for (;;) {
 		res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
 	return res;
 }
 
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+			(unsigned long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+			(unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+			(unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->entry->cookie = desc->entry->prev_cookie = 0;
 	desc->entry->eof = 0;
  out:
-	dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+			__FUNCTION__, status);
 	return status;
  out_release:
 	dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct nfs_fattr fattr;
 	long		res;
 
+	dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			(long long)filp->f_pos);
+	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
 	lock_kernel();
 
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 	}
 	unlock_kernel();
-	if (res < 0)
-		return res;
-	return 0;
+	if (res > 0)
+		res = 0;
+	dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			res);
+	return res;
 }
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
  */
 int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 {
+	dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			datasync);
+
 	return 0;
 }
 
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	parent = dget_parent(dentry);
 	lock_kernel();
 	dir = parent->d_inode;
+	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
 	if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+				__FUNCTION__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
 		goto out_bad;
 	}
 
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
  out_valid:
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 1;
 out_zap_parent:
 	nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
 	d_drop(dentry);
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 0;
 }
 
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
-	if (!inode)
+	res = (struct dentry *)inode;
+	if (IS_ERR(res))
 		goto out_unlock;
 no_entry:
 	res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	struct dentry *res = NULL;
 	int error;
 
+	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		return NULL;
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-	if (!inode) {
+	if (IS_ERR(inode)) {
 		dput(dentry);
 		return NULL;
 	}
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		if (error < 0)
 			goto out_err;
 	}
-	error = -ENOMEM;
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
-	if (inode == NULL)
+	error = PTR_ERR(inode);
+	if (IS_ERR(inode))
 		goto out_err;
 	d_instantiate(dentry, inode);
 	return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	int error;
 	int open_flags = 0;
 
-	dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct iattr attr;
 	int error;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
 		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 		sillycounter++;
 		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 
-		dfprintk(VFS, "trying to rename %s to %s\n",
-			 dentry->d_name.name, silly);
+		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+				dentry->d_name.name, silly);
 		
 		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
 		/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct rpc_cred *cred;
 	int res = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
 	if (mask == 0)
 		goto out;
 	/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
 		res = PTR_ERR(cred);
 	unlock_kernel();
 out:
+	dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+		inode->i_sb->s_id, inode->i_ino, mask, res);
 	return res;
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
 		res = generic_permission(inode, mask, NULL);
 	unlock_kernel();
-	return res;
+	goto out;
 }
 
 /*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ae2f3b33fe..0f583cb16dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
  *
  * There are important applications whose performance or correctness
  * depends on uncached access to file data.  Database clusters
- * (multiple copies of the same instance running on separate hosts) 
+ * (multiple copies of the same instance running on separate hosts)
  * implement their own cache coherency protocol that subsumes file
- * system cache protocols.  Applications that process datasets 
- * considerably larger than the client's memory do not always benefit 
- * from a local cache.  A streaming video server, for instance, has no 
+ * system cache protocols.  Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache.  A streaming video server, for instance, has no
  * need to cache the contents of a file.
  *
  * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
  * 08 Jun 2003	Port to 2.5 APIs  --cel
  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
  * 15 Sep 2004	Parallel async reads  --cel
+ * 04 May 2005	support O_DIRECT with aio  --cel
  *
  */
 
@@ -54,10 +55,10 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
 
 /*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
-	struct list_head	list;		/* nfs_read_data structs */
-	wait_queue_head_t	wait;		/* wait for i/o completion */
+
+	/* I/O parameters */
+	struct list_head	list,		/* nfs_read/write_data structs */
+				rewrite_list;	/* saved nfs_write_data structs */
+	struct nfs_open_context	*ctx;		/* file open context info */
+	struct kiocb *		iocb;		/* controlling i/o request */
+	struct inode *		inode;		/* target file of i/o */
+	unsigned long		user_addr;	/* location of user's buffer */
+	size_t			user_count;	/* total bytes to move */
+	loff_t			pos;		/* starting offset in file */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
-	atomic_t		complete,	/* i/os we're waiting for */
-				count,		/* bytes actually processed */
+
+	/* completion state */
+	spinlock_t		lock;		/* protect completion state */
+	int			outstanding;	/* i/os we're waiting for */
+	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+	struct completion	completion;	/* wait for i/o completion */
+
+	/* commit state */
+	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	int			flags;
+#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
 
 /**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O.  However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
  */
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
-		struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+	struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+			dentry->d_name.name, (long long) pos, nr_segs);
+
+	return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		struct page *page = pages[i];
+		if (do_dirty && !PageCompound(page))
+			set_page_dirty_lock(page);
+		page_cache_release(page);
+	}
+	kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
 	unsigned long page_count;
 	size_t array_size;
 
-	/* set an arbitrary limit to prevent type overflow */
-	/* XXX: this can probably be as large as INT_MAX */
-	if (size > MAX_DIRECTIO_SIZE) {
-		*pages = NULL;
-		return -EFBIG;
-	}
-
 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	page_count -= user_addr >> PAGE_SHIFT;
 
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
 					page_count, (rw == READ), 0,
 					*pages, NULL);
 		up_read(&current->mm->mmap_sem);
-		/*
-		 * If we got fewer pages than expected from get_user_pages(),
-		 * the user buffer runs off the end of a mapping; return EFAULT.
-		 */
-		if (result >= 0 && result < page_count) {
-			nfs_free_user_pages(*pages, result, 0);
+		if (result != page_count) {
+			/*
+			 * If we got fewer pages than expected from
+			 * get_user_pages(), the user buffer runs off the
+			 * end of a mapping; return EFAULT.
+			 */
+			if (result >= 0) {
+				nfs_free_user_pages(*pages, result, 0);
+				result = -EFAULT;
+			} else
+				kfree(*pages);
 			*pages = NULL;
-			result = -EFAULT;
 		}
 	}
 	return result;
 }
 
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
-	int i;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (do_dirty && !PageCompound(page))
-			set_page_dirty_lock(page);
-		page_cache_release(page);
-	}
-	kfree(pages);
+	struct nfs_direct_req *dreq;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_completion(&dreq->completion);
+	INIT_LIST_HEAD(&dreq->list);
+	INIT_LIST_HEAD(&dreq->rewrite_list);
+	dreq->iocb = NULL;
+	dreq->ctx = NULL;
+	spin_lock_init(&dreq->lock);
+	dreq->outstanding = 0;
+	dreq->count = 0;
+	dreq->error = 0;
+	dreq->flags = 0;
+
+	return dreq;
 }
 
-/**
- * nfs_direct_req_release - release  nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+	if (dreq->ctx != NULL)
+		put_nfs_open_context(dreq->ctx);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+	ssize_t result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+	if (dreq->iocb)
+		goto out;
+
+	result = wait_for_completion_interruptible(&dreq->completion);
+
+	if (!result)
+		result = dreq->error;
+	if (!result)
+		result = dreq->count;
+
+out:
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+	if (dreq->iocb) {
+		long res = (long) dreq->error;
+		if (!res)
+			res = (long) dreq->count;
+		aio_complete(dreq->iocb, res, 0);
+	}
+	complete_all(&dreq->completion);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return NULL;
 
-	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
-	INIT_LIST_HEAD(&dreq->list);
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
-
 	list = &dreq->list;
 	for(;;) {
 		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		reads++;
+		dreq->outstanding++;
 		if (nbytes <= rsize)
 			break;
 		nbytes -= rsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, reads);
 	return dreq;
 }
 
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	if (likely(status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+
+	spin_lock(&dreq->lock);
+
+	if (likely(task->tk_status >= 0))
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, status);
+		dreq->error = task->tk_status;
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		wake_up(&dreq->wait);
-		kref_put(&dreq->kref, nfs_direct_req_release);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
 	}
+
+	spin_unlock(&dreq->lock);
+	nfs_direct_complete(dreq);
 }
 
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+	.rpc_call_done = nfs_direct_read_result,
+	.rpc_release = nfs_readdata_release,
+};
+
+/*
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-		struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
+	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
-	unsigned int rsize = NFS_SERVER(inode)->rsize;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
-		unsigned int bytes;
+		size_t bytes;
 
 		bytes = rsize;
 		if (count < rsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_read_data, pages);
 		list_del_init(&data->pages);
 
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		data->res.eof = 0;
 		data->res.count = bytes;
 
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_read_direct_ops, data);
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = 0;
-
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
 	if (!dreq)
 		return -ENOMEM;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
-				 file_offset);
-	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	nfs_direct_read_schedule(dreq);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
 }
 
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- *                   then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 {
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	while (!list_empty(&dreq->list)) {
+		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+		list_del(&data->pages);
+		nfs_writedata_release(data);
+	}
+}
 
-		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+	struct list_head *pos;
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	list_for_each(pos, &dreq->list)
+		dreq->outstanding++;
+	dreq->count = 0;
+
+	nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+	if (unlikely(task->tk_status < 0)) {
+		dreq->error = task->tk_status;
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+	if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
 
-	return tot_bytes;
+	dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+	nfs_direct_write_complete(dreq, data->inode);
 }
 
-/**
- * nfs_direct_write_seg - Write out one iov segment.  Generate separate
- *                        write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+	.rpc_call_done = nfs_direct_commit_result,
+	.rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	const unsigned int wsize = NFS_SERVER(inode)->wsize;
-	size_t request;
-	int curpage, need_commit;
-	ssize_t result, tot_bytes;
-	struct nfs_writeverf first_verf;
-	struct nfs_write_data *wdata;
-
-	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
-	if (!wdata)
-		return -ENOMEM;
+	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task *task = &data->task;
 
-	wdata->inode = inode;
-	wdata->cred = ctx->cred;
-	wdata->args.fh = NFS_FH(inode);
-	wdata->args.context = ctx;
-	wdata->args.stable = NFS_UNSTABLE;
-	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata->args.stable = NFS_FILE_SYNC;
-	wdata->res.fattr = &wdata->fattr;
-	wdata->res.verf = &wdata->verf;
+	data->inode = dreq->inode;
+	data->cred = dreq->ctx->cred;
 
-	nfs_begin_data_update(inode);
-retry:
-	need_commit = 0;
-	tot_bytes = 0;
-	curpage = 0;
-	request = count;
-	wdata->args.pgbase = user_addr & ~PAGE_MASK;
-	wdata->args.offset = file_offset;
-	do {
-		wdata->args.count = request;
-		if (wdata->args.count > wsize)
-			wdata->args.count = wsize;
-		wdata->args.pages = &pages[curpage];
+	data->args.fh = NFS_FH(data->inode);
+	data->args.offset = dreq->pos;
+	data->args.count = dreq->user_count;
+	data->res.count = 0;
+	data->res.fattr = &data->fattr;
+	data->res.verf = &data->verf;
 
-		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata->args.count, (long long) wdata->args.offset,
-			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+	rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+				&nfs_commit_direct_ops, data);
+	NFS_PROTO(data->inode)->commit_setup(data, 0);
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->write(wdata);
-		unlock_kernel();
+	data->task.tk_priority = RPC_PRIORITY_NORMAL;
+	data->task.tk_cookie = (unsigned long)data->inode;
+	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+	dreq->commit_data = NULL;
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			goto out;
-		}
+	dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
 
-		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata->verf.verifier,
-						sizeof(first_verf.verifier));
-		if (wdata->verf.committed != NFS_FILE_SYNC) {
-			need_commit = 1;
-			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
-					sizeof(first_verf.verifier)))
-				goto sync_retry;
-		}
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+}
 
-		tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	int flags = dreq->flags;
 
-		/* in case of a short write: stop now, let the app recover */
-		if (result < wdata->args.count)
+	dreq->flags = 0;
+	switch (flags) {
+		case NFS_ODIRECT_DO_COMMIT:
+			nfs_direct_commit_schedule(dreq);
 			break;
+		case NFS_ODIRECT_RESCHED_WRITES:
+			nfs_direct_write_reschedule(dreq);
+			break;
+		default:
+			nfs_end_data_update(inode);
+			if (dreq->commit_data != NULL)
+				nfs_commit_free(dreq->commit_data);
+			nfs_direct_free_writedata(dreq);
+			nfs_direct_complete(dreq);
+	}
+}
 
-		wdata->args.offset += result;
-		wdata->args.pgbase += result;
-		curpage += wdata->args.pgbase >> PAGE_SHIFT;
-		wdata->args.pgbase &= ~PAGE_MASK;
-		request -= result;
-	} while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = nfs_commit_alloc(0);
+	if (dreq->commit_data != NULL)
+		dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = NULL;
+}
 
-	/*
-	 * Commit data written so far, even in the event of an error
-	 */
-	if (need_commit) {
-		wdata->args.count = tot_bytes;
-		wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	nfs_end_data_update(inode);
+	nfs_direct_free_writedata(dreq);
+	nfs_direct_complete(dreq);
+}
+#endif
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->commit(wdata);
-		unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	dreq = nfs_direct_req_alloc();
+	if (!dreq)
+		return NULL;
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_write_data *data = nfs_writedata_alloc(wpages);
 
-		if (result < 0 || memcmp(&first_verf.verifier,
-					 &wdata->verf.verifier,
-					 sizeof(first_verf.verifier)) != 0)
-			goto sync_retry;
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_write_data, pages);
+				list_del(&data->pages);
+				nfs_writedata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		dreq->outstanding++;
+		if (nbytes <= wsize)
+			break;
+		nbytes -= wsize;
 	}
-	result = tot_bytes;
 
-out:
-	nfs_end_data_update(inode);
-	nfs_writedata_free(wdata);
-	return result;
+	nfs_alloc_commit_data(dreq);
 
-sync_retry:
-	wdata->args.stable = NFS_FILE_SYNC;
-	goto retry;
+	kref_get(&dreq->kref);
+	return dreq;
 }
 
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- *                    then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
-		struct nfs_open_context *ctx, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = task->tk_status;
+
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	spin_lock(&dreq->lock);
 
-		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-		nfs_free_user_pages(pages, page_count, 0);
+	if (likely(status >= 0))
+		dreq->count += data->res.count;
+	else
+		dreq->error = task->tk_status;
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
+	if (data->res.verf->committed != NFS_FILE_SYNC) {
+		switch (dreq->flags) {
+			case 0:
+				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 				break;
-			return result;
+			case NFS_ODIRECT_DO_COMMIT:
+				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+					dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				}
 		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
 	}
-	return tot_bytes;
+	/* In case we have to resend */
+	data->args.stable = NFS_FILE_SYNC;
+
+	spin_unlock(&dreq->lock);
 }
 
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
  */
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
 {
-	ssize_t result = -EINVAL;
-	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx;
-	struct dentry *dentry = file->f_dentry;
-	struct inode *inode = dentry->d_inode;
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	/*
-	 * No support for async yet
-	 */
-	if (!is_sync_kiocb(iocb))
-		return result;
-
-	ctx = (struct nfs_open_context *)file->private_data;
-	switch (rw) {
-	case READ:
-		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_read(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	case WRITE:
-		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_write(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	default:
-		break;
+	spin_lock(&dreq->lock);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
 	}
+	spin_unlock(&dreq->lock);
+
+	nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+	.rpc_call_done = nfs_direct_write_result,
+	.rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	unsigned int curpage, pgbase;
+
+	curpage = 0;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
+	do {
+		struct nfs_write_data *data;
+		size_t bytes;
+
+		bytes = wsize;
+		if (count < wsize)
+			bytes = count;
+
+		BUG_ON(list_empty(list));
+		data = list_entry(list->next, struct nfs_write_data, pages);
+		list_move_tail(&data->pages, &dreq->rewrite_list);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = pos;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.count = bytes;
+		data->res.verf = &data->verf;
+
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_write_direct_ops, data);
+		NFS_PROTO(inode)->write_setup(data, sync);
+
+		data->task.tk_priority = RPC_PRIORITY_NORMAL;
+		data->task.tk_cookie = (unsigned long) inode;
+
+		lock_kernel();
+		rpc_execute(&data->task);
+		unlock_kernel();
+
+		dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
+
+		pos += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
+
+		count -= bytes;
+	} while (count != 0);
+	BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	int sync = 0;
+
+	dreq = nfs_direct_write_alloc(count, wsize);
+	if (!dreq)
+		return -ENOMEM;
+	if (dreq->commit_data == NULL || count < wsize)
+		sync = FLUSH_STABLE;
+
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
+
+	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+	nfs_begin_data_update(inode);
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_write_schedule(dreq, sync);
+	result = nfs_direct_wait(dreq);
+	rpc_clnt_sigunmask(clnt, &oldset);
+
 	return result;
 }
 
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change.  So our preference is simply
+ * READ where the file size could change.  Our preference is simply
  * to do all reads the application wants, and the server will take
  * care of managing the end of file boundary.
- * 
+ *
  * This function also eliminates unnecessarily updating the file's
  * atime locally, as the NFS server sets the file's atime, and this
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
-	loff_t *ppos = &iocb->ki_pos;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = buf,
-		.iov_len = count,
-	};
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	if (!is_sync_kiocb(iocb))
-		goto out;
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
 	retval = 0;
 	if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	retval = nfs_get_user_pages(READ, (unsigned long) buf,
+						count, &pages);
+	if (retval < 0)
+		goto out;
+	page_count = retval;
+
+	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+						pages, page_count);
 	if (retval > 0)
-		*ppos = pos + retval;
+		iocb->ki_pos = pos + retval;
 
 out:
 	return retval;
@@ -704,8 +810,8 @@ out:
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = (char __user *)buf,
-	};
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	retval = -EINVAL;
-	if (!is_sync_kiocb(iocb))
-		goto out;
-
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
 		goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	retval = 0;
 	if (!count)
 		goto out;
-	iov.iov_len = count,
 
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_READ, buf, count))
 		goto out;
 
 	retval = nfs_sync_mapping(mapping);
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+						count, &pages);
+	if (retval < 0)
+		goto out;
+	page_count = retval;
+
+	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+					pos, pages, page_count);
+
+	/*
+	 * XXX: nfs_end_data_update() already ensures this file's
+	 *      cached data is subsequently invalidated.  Do we really
+	 *      need to call invalidate_inode_pages2() again here?
+	 *
+	 *      For aio writes, this invalidation will almost certainly
+	 *      occur before the writes complete.  Kind of racey.
+	 */
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
+
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
@@ -777,6 +890,10 @@ out:
 	return retval;
 }
 
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
 int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -790,6 +907,10 @@ int nfs_init_directcache(void)
 	return 0;
 }
 
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
 void nfs_destroy_directcache(void)
 {
 	if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f53..5263b2864a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
-	int (*open)(struct inode *, struct file *);
 	int res;
 
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	/* Do NFSv4 open() call */
-	if ((open = server->rpc_ops->file_open) != NULL)
-		res = open(inode, filp);
+	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
 		filemap_fdatawrite(filp->f_mapping);
+	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
 
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	lock_kernel();
 	/* Ensure that data+attribute caches are up to date after close() */
 	status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_file(inode, iocb->ki_filp);
+	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	lock_kernel();
 	status = nfs_wb_all(inode);
 	if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 	return status;
 }
 
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+	/* FIXME: we really should cancel any unstarted writes on this page */
+	return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+	return !nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
 	.commit_write = nfs_commit_write,
+	.invalidatepage = nfs_invalidate_page,
+	.releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	if (!count)
 		goto out;
 
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, buf, count, pos);
 out:
 	return result;
@@ -376,15 +392,17 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct file_lock *cfl;
+	struct file_lock cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
 	/* Try local locking first */
-	cfl = posix_test_lock(filp, fl);
-	if (cfl != NULL) {
-		locks_copy_lock(fl, cfl);
+	if (posix_test_lock(filp, fl, &cfl)) {
+		fl->fl_start = cfl.fl_start;
+		fl->fl_end = cfl.fl_end;
+		fl->fl_type = cfl.fl_type;
+		fl->fl_pid = cfl.fl_pid;
 		goto out;
 	}
 
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	else
 		status = do_vfs_lock(filp, fl);
 	unlock_kernel();
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	sigset_t oldset;
 	int status;
 
-	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
 	/*
 	 * Flush all pending writes before doing anything
 	 * with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	nfs_sync_mapping(filp->f_mapping);
 	nfs_zap_caches(inode);
 out:
-	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
 
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
-
-	if (!inode)
-		return -EINVAL;
+	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
-	if (!inode)
-		return -EINVAL;
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333..3fab5b0cfc5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
-	struct semaphore      idmap_lock;    /* Serializes upcalls */
-	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct mutex          idmap_lock;    /* Serializes upcalls */
+	struct mutex          idmap_im_lock; /* Protects the hashtable */
 	struct idmap_hashtable idmap_user_hash;
 	struct idmap_hashtable idmap_group_hash;
 };
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	if (clp->cl_idmap != NULL)
 		return;
-        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return;
 
-	memset(idmap, 0, sizeof(*idmap));
-
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
 
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
 		return;
 	}
 
-        init_MUTEX(&idmap->idmap_lock);
-        init_MUTEX(&idmap->idmap_im_lock);
+        mutex_init(&idmap->idmap_lock);
+        mutex_init(&idmap->idmap_im_lock);
 	init_waitqueue_head(&idmap->idmap_wq);
 	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
+	dput(idmap->idmap_dentry);
+	idmap->idmap_dentry = NULL;
 	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	if (namelen >= IDMAP_NAMESZ)
 		return -EINVAL;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_name(h, name, namelen);
 	if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return (ret);
 }
 
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
 	im = &idmap->idmap_im;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_id(h, id);
 	if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return ret;
 }
 
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         if (copy_from_user(&im_in, src, mlen) != 0)
 		return (-EFAULT);
 
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	ret = mlen;
 	im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
 	ret = mlen;
 out:
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	return ret;
 }
 
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 
 	if (msg->errno >= 0)
 		return;
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
 	wake_up(&idmap->idmap_wq);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 }
 
 /* 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3413996f9a8..2f7656b911b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 static int
 nfs_write_inode(struct inode *inode, int sync)
 {
-	int flags = sync ? FLUSH_WAIT : 0;
+	int flags = sync ? FLUSH_SYNC : 0;
 	int ret;
 
 	ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
 nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
 {
 	struct nfs_server	*server = NFS_SB(sb);
-	struct inode *rooti;
 	int			error;
 
 	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
 		return ERR_PTR(error);
 	}
 
-	rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
-	if (!rooti)
-		return ERR_PTR(-ENOMEM);
-	return rooti;
+	return nfs_fhget(sb, rootfh, fsinfo->fattr);
 }
 
 /*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 
 	sb->s_magic      = NFS_SUPER_MAGIC;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (server->io_stats == NULL)
+		return -ENOMEM;
+
 	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
 	/* Did getting the root inode fail? */
 	if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
+	/* mount time stamp, in seconds */
+	server->mount_time = jiffies;
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 
 }
 
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
 	static struct proc_nfs_info {
 		int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
-		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
-		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
 	char buf[12];
 	char *proto;
 
-	seq_printf(m, ",v%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
-	if (nfss->acregmin != 3*HZ)
+	if (nfss->acregmin != 3*HZ || showdefaults)
 		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
-	if (nfss->acregmax != 60*HZ)
+	if (nfss->acregmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
-	if (nfss->acdirmin != 30*HZ)
+	if (nfss->acdirmin != 30*HZ || showdefaults)
 		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
-	if (nfss->acdirmax != 60*HZ)
+	if (nfss->acdirmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
+	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+	nfs_show_mount_options(m, nfss, 0);
+
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
+
+	return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+	int i, cpu;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct rpc_auth *auth = nfss->client->cl_auth;
+	struct nfs_iostats totals = { };
+
+	seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+	/*
+	 * Display all mount option settings
+	 */
+	seq_printf(m, "\n\topts:\t");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+	nfs_show_mount_options(m, nfss, 1);
+
+	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+	seq_printf(m, "\n\tcaps:\t");
+	seq_printf(m, "caps=0x%x", nfss->caps);
+	seq_printf(m, ",wtmult=%d", nfss->wtmult);
+	seq_printf(m, ",dtsize=%d", nfss->dtsize);
+	seq_printf(m, ",bsize=%d", nfss->bsize);
+	seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+	if (nfss->rpc_ops->version == 4) {
+		seq_printf(m, "\n\tnfsv4:\t");
+		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+	}
+#endif
+
+	/*
+	 * Display security flavor in effect for this mount
+	 */
+	seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+	if (auth->au_flavor)
+		seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+	/*
+	 * Display superblock I/O counters
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct nfs_iostats *stats;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		preempt_disable();
+		stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+		for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+			totals.events[i] += stats->events[i];
+		for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+			totals.bytes[i] += stats->bytes[i];
+
+		preempt_enable();
+	}
+
+	seq_printf(m, "\n\tevents:\t");
+	for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+		seq_printf(m, "%lu ", totals.events[i]);
+	seq_printf(m, "\n\tbytes:\t");
+	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+		seq_printf(m, "%Lu ", totals.bytes[i]);
+	seq_printf(m, "\n");
+
+	rpc_print_iostats(m, nfss->client);
+
 	return 0;
 }
 
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		.fh	= fh,
 		.fattr	= fattr
 	};
-	struct inode *inode = NULL;
+	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 	hash = nfs_fattr_to_ino_t(fattr);
 
-	if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+	inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+	if (inode == NULL) {
+		inode = ERR_PTR(-ENOMEM);
 		goto out_no_inode;
+	}
 
 	if (inode->i_state & I_NEW) {
 		struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
 	return inode;
 
 out_no_inode:
-	printk("nfs_fhget: iget failed\n");
+	dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
 	goto out;
 }
 
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct nfs_fattr fattr;
 	int error;
 
+	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	lock_kernel();
 	nfs_begin_data_update(inode);
-	/* Write all dirty data if we're changing file permissions or size */
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-		filemap_write_and_wait(inode->i_mapping);
-		nfs_wb_all(inode);
-	}
+	/* Write all dirty data */
+	filemap_write_and_wait(inode->i_mapping);
+	nfs_wb_all(inode);
 	/*
 	 * Return any delegations if we're going to change ACLs
 	 */
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
 		vmtruncate(inode, attr->ia_size);
 	}
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int err;
 
 	/* Flush out writes to the server in order to update c/mtime */
-	nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+	nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	return err;
 }
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
 	struct nfs_open_context *ctx;
 
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 	if (ctx != NULL) {
 		atomic_set(&ctx->count, 1);
 		ctx->dentry = dget(dentry);
+		ctx->vfsmnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
 		if (ctx->cred != NULL)
 			put_rpccred(ctx->cred);
 		dput(ctx->dentry);
+		mntput(ctx->vfsmnt);
 		kfree(ctx);
 	}
 }
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	return ctx;
 }
 
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+	ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		if (S_ISREG(inode->i_mode))
 			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 
+	/* Has the inode gone and changed behind our back? */
+	if (nfsi->fileid != fattr->fileid
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+		return -EIO;
+	}
+
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-			nfsi->change_attr != fattr->change_attr) {
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+		if (nfsi->change_attr == fattr->change_attr)
+			goto out;
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
-	/* Has the inode gone and changed behind our back? */
-	if (nfsi->fileid != fattr->fileid
-			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-		return -EIO;
-	}
-
-	cur_size = i_size_read(inode);
- 	new_isize = nfs_size_to_loff_t(fattr->size);
-
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
-	if (cur_size != new_isize) {
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-		if (nfsi->npages == 0)
-			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-	}
+
+	cur_size = i_size_read(inode);
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
+	if (cur_size != new_isize && nfsi->npages == 0)
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (inode->i_nlink != fattr->nlink)
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
+out:
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfsi->cache_change_attribute = jiffies;
 	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4)
-	    && nfsi->change_attr != fattr->change_attr) {
-		dprintk("NFS: change_attr change on server for file %s/%ld\n",
-		       inode->i_sb->s_id, inode->i_ino);
-		nfsi->change_attr = fattr->change_attr;
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = jiffies;
-	}
-
 	/* If ctime has changed we should definitely clear access+acl caches */
 	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
 		invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  		inode->i_blksize = fattr->du.nfs2.blocksize;
  	}
 
+	if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+		if (nfsi->change_attr != fattr->change_attr) {
+			dprintk("NFS: change_attr change on server for file %s/%ld\n",
+					inode->i_sb->s_id, inode->i_ino);
+			nfsi->change_attr = fattr->change_attr;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			nfsi->cache_change_attribute = jiffies;
+		} else
+			invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+	}
+
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 #endif /* CONFIG_NFS_V3 */
 
 	s = ERR_PTR(-ENOMEM);
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		goto out_err;
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
 
 	rpciod_down();		/* release rpciod */
 
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
 	.clear_inode	= nfs4_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 		return ERR_PTR(-EINVAL);
 	}
 
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		return ERR_PTR(-ENOMEM);
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	rpciod_down();		/* release rpciod */
 
 	destroy_nfsv4_state(server);
 
+	rpciod_down();
+
+	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 00000000000..6350ecbde58
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
+/*
+ *  linux/fs/nfs/iostat.h
+ *
+ *  Declarations for NFS client per-mount statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the health of
+ *  the NFS client and the health of each NFS mount point.  Generally these
+ *  are not for detailed problem diagnosis, but simply to indicate that there
+ *  is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS		"1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written to the
+ *     server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications via
+ *     the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files opened
+ *     with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client.  Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface.  A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+	NFSIOS_NORMALREADBYTES = 0,
+	NFSIOS_NORMALWRITTENBYTES,
+	NFSIOS_DIRECTREADBYTES,
+	NFSIOS_DIRECTWRITTENBYTES,
+	NFSIOS_SERVERREADBYTES,
+	NFSIOS_SERVERWRITTENBYTES,
+	NFSIOS_READPAGES,
+	NFSIOS_WRITEPAGES,
+	__NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging.  The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches.  This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+	NFSIOS_INODEREVALIDATE = 0,
+	NFSIOS_DENTRYREVALIDATE,
+	NFSIOS_DATAINVALIDATE,
+	NFSIOS_ATTRINVALIDATE,
+	NFSIOS_VFSOPEN,
+	NFSIOS_VFSLOOKUP,
+	NFSIOS_VFSACCESS,
+	NFSIOS_VFSUPDATEPAGE,
+	NFSIOS_VFSREADPAGE,
+	NFSIOS_VFSREADPAGES,
+	NFSIOS_VFSWRITEPAGE,
+	NFSIOS_VFSWRITEPAGES,
+	NFSIOS_VFSGETDENTS,
+	NFSIOS_VFSSETATTR,
+	NFSIOS_VFSFLUSH,
+	NFSIOS_VFSFSYNC,
+	NFSIOS_VFSLOCK,
+	NFSIOS_VFSRELEASE,
+	NFSIOS_CONGESTIONWAIT,
+	NFSIOS_SETATTRTRUNC,
+	NFSIOS_EXTENDWRITE,
+	NFSIOS_SILLYRENAME,
+	NFSIOS_SHORTREAD,
+	NFSIOS_SHORTWRITE,
+	NFSIOS_DELAY,
+	__NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+	unsigned long long	bytes[__NFSIOS_BYTESMAX];
+	unsigned long		events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(server->io_stats, cpu);
+	iostats->events[stat] ++;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(server->io_stats, cpu);
+	iostats->bytes[stat] += addend;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+	return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+	if (stats != NULL)
+		free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0b9a78353d6..445abb4d421 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
+	struct rpc_message msg	= {
+		.rpc_argp	= path,
+		.rpc_resp	= &result,
+	};
 	char			hostname[32];
 	int			status;
-	int			call;
 
 	dprintk("NFS:      nfs_mount(%08x:%s)\n",
 			(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 	if (IS_ERR(mnt_clnt))
 		return PTR_ERR(mnt_clnt);
 
-	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
-	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	if (version == NFS_MNT3_VERSION)
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+	else
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	return status < 0? status : (result.status? -EACCES : 0);
 }
 
@@ -137,6 +144,8 @@ static struct rpc_procinfo	mnt_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MNTPROC_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MOUNTPROC3_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 6548a65de94..f0015fa876e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
 	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
 	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
 	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
-	.p_timer    =  timer						\
+	.p_timer    =  timer,						\
+	.p_statidx  =  NFSPROC_##proc,					\
+	.p_name     =  #proc,						\
 	}
 struct rpc_procinfo	nfs_procedures[] = {
     PROC(GETATTR,	fhandle,	attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae94..33287879bd2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 	struct nfs3_getaclres res = {
 		.fattr =	&fattr,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+	};
 	struct posix_acl *acl;
 	int status, count;
 
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 		return NULL;
 
 	dprintk("NFS call getacl\n");
-	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
-			  &args, &res, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	dprintk("NFS reply getacl: %d\n", status);
 
 	/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		.acl_access = acl,
 		.pages = pages,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int status, count;
 
 	status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 
 	dprintk("NFS call setacl\n");
 	nfs_begin_data_update(inode);
-	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
-			  &args, &fattr, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
 	spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f055..cf186f0d2b3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	return res;
 }
 
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
-		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
-		nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags)	nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
 	if (task->tk_status != -EJUKEBOX)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
 do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(client, &msg, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-		status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_resp = info->fattr;
+		status = rpc_call_sync(client, &msg, 0);
 		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
@@ -107,12 +103,16 @@ static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFS3PROC_GETATTR,
-			  fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh		= NFS_FH(inode),
 		.sattr		= sattr,
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
-	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
-		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
-			 fhandle, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_argp = fhandle;
+		msg.rpc_resp = fattr;
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	}
 	dprintk("NFS reply lookup: %d\n", status);
 	if (status >= 0)
 		status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= entry->cred
+		.rpc_cred	= entry->cred,
 	};
 	int mode = entry->mask;
 	int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
-			  &args, &fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_post_op_update_inode(dir, &dir_attr);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 	struct rpc_message *msg = &task->tk_msg;
 	struct nfs_fattr	*dir_attr;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, dir->d_inode))
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.fromattr	= &old_dir_attr,
 		.toattr		= &new_dir_attr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	nfs_fattr_init(&old_dir_attr);
 	nfs_fattr_init(&new_dir_attr);
-	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_post_op_update_inode(old_dir, &old_dir_attr);
 	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.dir_attr	= &dir_attr,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int mode = sattr->ia_mode;
 	int status;
 
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKNOD],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -707,11 +759,16 @@ static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsstat *stat)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSSTAT],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= stat,
+	};
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
 }
@@ -720,11 +777,16 @@ static int
 nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -733,40 +795,34 @@ static int
 nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 		   struct nfs_pathconf *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_PATHCONF],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_read_ops = {
-	.rpc_call_done = nfs3_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
 		.rpc_argp	= &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_write_ops = {
-	.rpc_call_done = nfs3_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			stable;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
+	data->args.stable = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
-		if (!NFS_I(inode)->ncommit)
-			stable = NFS_FILE_SYNC;
-		else
-			stable = NFS_DATA_SYNC;
-	} else
-		stable = NFS_UNSTABLE;
-	data->args.stable = stable;
-
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+		data->args.stable = NFS_FILE_SYNC;
+		if (NFS_I(data->inode)->ncommit)
+			data->args.stable = NFS_DATA_SYNC;
+	}
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_commit_ops = {
-	.rpc_call_done = nfs3_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
 		.rpc_argp	= &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 		.rpc_cred	= data->cred,
 	};
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
+	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
+	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_done	= nfs3_commit_done,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5224a191efb..ec233619687 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
 	.p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,			\
 	.p_decode    = (kxdrproc_t) nfs3_xdr_##restype,			\
 	.p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,	\
-	.p_timer     = timer						\
+	.p_timer     = timer,						\
+	.p_statidx   = NFS3PROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 struct rpc_procinfo	nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
 		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
 		.p_timer = 1,
+		.p_name = "GETACL",
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
 		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
 		.p_timer = 0,
+		.p_name = "SETACL",
 	},
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e..47ece1dd3c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
 	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 		goto out;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
-	if (inode == NULL)
+	if (IS_ERR(inode))
 		goto out;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	int status;
 
 	atomic_inc(&data->count);
+	/*
+	 * If rpc_run_task() ends up calling ->rpc_release(), we
+	 * want to ensure that it takes the 'error' code path.
+	 */
+	data->rpc_status = -ENOMEM;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_opendata_free(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
 static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
-	struct nfs4_state *res;
+	struct nfs4_state *res = ERR_PTR(-EIO);
 	int err;
 
 	do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 	return res;
 }
 
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_setattrargs  arg = {
-                .fh             = fhandle,
+                .fh             = NFS_FH(inode),
                 .iap            = sattr,
 		.server		= server,
 		.bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 
-	if (state != NULL) {
+	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+		/* Use that stateid */
+	} else if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
 		nfs4_copy_stateid(&arg.stateid, state, current->files);
 	} else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 	return status;
 }
 
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(server,
-				_nfs4_do_setattr(server, fattr, fhandle, sattr,
-					state),
+				_nfs4_do_setattr(inode, fattr, sattr, state),
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	if (ctx != NULL)
 		state = ctx->state;
 
-	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
-			NFS_FH(inode), sattr, state);
+	status = nfs4_do_setattr(inode, fattr, sattr, state);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	d_instantiate(dentry, igrab(state->inode));
 	if (flags & O_EXCL) {
 		struct nfs_fattr fattr;
-		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
-		                     NFS_FH(state->inode), sattr, state);
+		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
 	}
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, server) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status > 0)
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task, calldata);
+		renew_lease(server, data->timestamp);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_read_ops = {
-	.rpc_call_done = nfs4_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};
-	struct inode *inode = data->inode;
-	int flags;
 
 	data->timestamp   = jiffies;
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
-	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_write_ops = {
-	.rpc_call_done = nfs4_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
 		.rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	struct inode *inode = data->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
-	int flags;
 	
 	if (how & FLUSH_STABLE) {
 		if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 
 	data->timestamp   = jiffies;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
-	/* Call back common NFS writeback processing */
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_commit_ops = {
-	.rpc_call_done = nfs4_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};	
-	struct inode *inode = data->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	int flags;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 	
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);	
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 /*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 				rpc_wake_up_task(task);
 			task->tk_status = 0;
 			return -EAGAIN;
-		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+			nfs_inc_server_stats((struct nfs_server *) server,
+						NFSIOS_DELAY);
+		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	long timeout;
+	int err;
+	do {
+		err = _nfs4_proc_setclientid_confirm(clp, cred);
+		switch (err) {
+			case 0:
+				return err;
+			case -NFS4ERR_RESOURCE:
+				/* The IBM lawyers misread another document! */
+			case -NFS4ERR_DELAY:
+				err = nfs4_delay(clp->cl_rpcclient, &timeout);
+		}
+	} while (err == 0);
+	return err;
+}
+
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
 	kfree(calldata);
 }
 
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
 	.rpc_call_prepare = nfs4_delegreturn_prepare,
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->rpc_status = 0;
 
 	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_delegreturn_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status == 0) {
 		status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		struct nfs_seqid *seqid)
 {
 	struct nfs4_unlockdata *data;
-	struct rpc_task *task;
 
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 	/* Unlock _before_ we do the RPC call */
 	do_vfs_lock(fl->fl_file, fl);
-	task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
-	if (IS_ERR(task))
-		nfs4_locku_release_calldata(data);
-	return task;
+	return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 }
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		data->arg.reclaim = 1;
 	task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
 			&nfs4_lock_ops, data);
-	if (IS_ERR(task)) {
-		nfs4_lock_release(data);
+	if (IS_ERR(task))
 		return PTR_ERR(task);
-	}
 	ret = nfs4_wait_for_completion_rpc_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
 {
 	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
 
+	if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+		return 0;
 	if (buf && buflen < len)
 		return -ERANGE;
 	if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
+	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
+	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_done	= nfs4_commit_done,
 	.file_open      = nfs_open,
 	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7d..96e5b82c153 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
 				NIPQUAD(clp->cl_addr.s_addr), -status);
+	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1bd36a483..7c5d70efe72 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
 	.p_encode = (kxdrproc_t) nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t) nfs4_xdr_##restype,		\
 	.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+	.p_statidx = NFSPROC4_CLNT_##proc,			\
+	.p_name   = #proc,					\
     }
 
 struct rpc_procinfo	nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e..106aca388eb 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+	BUG_ON(page->mapping->host != inode);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
 
-	spin_lock(&nfsi->req_lock);
-	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-	spin_unlock(&nfsi->req_lock);
+	if (req->wb_page != NULL) {
+		spin_lock(&nfsi->req_lock);
+		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+		spin_unlock(&nfsi->req_lock);
+	}
 	nfs_unlock_request(req);
 }
 
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
  */
 void nfs_clear_request(struct nfs_page *req)
 {
-	if (req->wb_page) {
-		page_cache_release(req->wb_page);
+	struct page *page = req->wb_page;
+	if (page != NULL) {
+		page_cache_release(page);
 		req->wb_page = NULL;
 	}
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03..9dd85cac2df 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 {
 	struct nfs_fattr *fattr = info->fattr;
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
-	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+	msg.rpc_resp = &fsinfo;
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -90,12 +97,16 @@ static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFSPROC_GETATTR,
-				fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		.fh	= NFS_FH(inode),
 		.sattr	= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READLINK],
+		.rpc_argp	= &args,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
 }
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int status, mode;
 
 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	}
 
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
 		nfs_fattr_init(&fattr);
-		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
 	struct rpc_message	msg = { 
 		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= NULL
 	};
 	int			status;
 
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.toname		= new_name->name,
 		.tolen		= new_name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RENAME],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_mark_for_revalidate(old_dir);
 	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 		.toname		= name->name,
 		.tolen		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_mark_for_revalidate(inode);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(fattr);
 	fhandle->size = 0;
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RMDIR],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
 		.count		= count,
-		.pages		= &page
+		.pages		= &page,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= cred
+		.rpc_cred	= cred,
 	};
 	int			status;
 
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsstat *stat)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  statfs\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
 		goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 			struct nfs_fsinfo *info)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
 		goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_read_ops = {
-	.rpc_call_done = nfs_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
 		.rpc_argp	= &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_write_ops = {
-	.rpc_call_done = nfs_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
+	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
+	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8..3961524fd4a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_rdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		}
 		count -= result;
 		rdata->args.pgbase += result;
+		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
 		/* Note: result == 0 should only happen if we're caching
 		 * a write that extends the file and punches a hole.
 		 */
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
 	struct inode		*inode;
+	int flags;
 
 	data->req	  = req;
 	data->inode	  = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->res.eof     = 0;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct. */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_readpage_result_partial;
 
 		if (nbytes > rsize) {
-			nfs_read_rpcsetup(req, data, rsize, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					rsize, offset);
 			offset += rsize;
 			nbytes -= rsize;
 		} else {
-			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					nbytes, offset);
 			nbytes = 0;
 		}
 		nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_readpage_result_full;
-	nfs_read_rpcsetup(req, data, count, 0);
+	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
 	nfs_execute_read(data);
 	return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (status >= 0) {
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (task->tk_status >= 0) {
 		unsigned int request = data->args.count;
 		unsigned int result = data->res.count;
 
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+	.rpc_call_done = nfs_readpage_result_partial,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	unsigned int count = data->res.count;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 		struct page *page = req->wb_page;
 		nfs_list_remove_request(req);
 
-		if (status >= 0) {
+		if (task->tk_status >= 0) {
 			if (count < PAGE_CACHE_SIZE) {
 				if (count < req->wb_bytes)
 					memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+	.rpc_call_done = nfs_readpage_result_full,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
-	int status = task->tk_status;
+	int status;
 
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, status);
+		task->tk_pid, task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
+
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
 	/* Is this a short read? */
 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		task->tk_status = -EIO;
 	}
 	spin_lock(&data->inode->i_lock);
 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&data->inode->i_lock);
-	data->complete(data, status);
+	return 0;
 }
 
 /*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page->index);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
 	/*
 	 * Try to flush any pending writes to the file..
 	 *
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			nr_pages);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
 		if (!ret)
+			nfs_add_stats(inode, NFSIOS_READPAGES, err);
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d55..0e28189c215 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		goto out;
-	memset(data, 0, sizeof(*data));
 
 	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
 	if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b683550..3f5225404c9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 			   unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 			p->pagevec = &p->page_array[0];
 		else {
 			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kzalloc(size, GFP_NOFS);
+			if (!p->pagevec) {
+				mempool_free(p, nfs_commit_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
 			p->pagevec = kmalloc(size, GFP_NOFS);
 			if (p->pagevec) {
 				memset(p->pagevec, 0, size);
 			} else {
-				mempool_free(p, nfs_commit_mempool);
+				mempool_free(p, nfs_wdata_mempool);
 				p = NULL;
 			}
 		}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 	return p;
 }
 
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
-	mempool_free(p, nfs_commit_mempool);
+	mempool_free(p, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
 }
 
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	        wdata->args.pgbase += result;
 		written += result;
 		count -= result;
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 	} while (count);
 	/* Update file length */
 	nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	int priority = wb_priority(wbc);
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
 	/*
 	 * Note: We need to ensure that we have a reference to the inode
 	 *       if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	struct inode *inode = mapping->host;
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	if (err < 0)
 		goto out;
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	SetPagePrivate(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
+	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  *
  * Interruptible by signals only if mounted with intr flag.
  */
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 	else
 		idx_end = idx_start + npages - 1;
 
-	spin_lock(&nfsi->req_lock);
 	next = idx_start;
 	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 		spin_unlock(&nfsi->req_lock);
 		error = nfs_wait_on_request(req);
 		nfs_release_request(req);
+		spin_lock(&nfsi->req_lock);
 		if (error < 0)
 			return error;
-		spin_lock(&nfsi->req_lock);
 		res++;
 	}
-	spin_unlock(&nfsi->req_lock);
 	return res;
 }
 
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
+
+	spin_lock(&nfsi->req_lock);
+	ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
+}
+
 /*
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
 	}
 	return res;
 }
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	return 0;
+}
 #endif
 
 static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 
 	if (!bdi_write_congested(bdi))
 		return 0;
+
+	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
 	if (intr) {
 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
 		sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				spin_unlock(&nfsi->req_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
-				if (error < 0)
+				if (error < 0) {
+					if (new)
+						nfs_release_request(new);
 					return ERR_PTR(error);
+				}
 				continue;
 			}
 			spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 	struct nfs_page	*req;
 	int		status = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
  */
 static void nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
 		int how)
 {
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page *req = nfs_list_entry(head->next);
 	struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_writeback_done_partial;
 
 		if (nbytes > wsize) {
-			nfs_write_rpcsetup(req, data, wsize, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					wsize, offset, how);
 			offset += wsize;
 			nbytes -= wsize;
 		} else {
-			nfs_write_rpcsetup(req, data, nbytes, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					nbytes, offset, how);
 			nbytes = 0;
 		}
 		nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_write_data	*data;
 	unsigned int		count;
 
-	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(head, inode, how);
-
 	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!data)
 		goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_writeback_done_full;
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, count, 0, how);
+	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
 	nfs_execute_write(data);
 	return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	return -ENOMEM;
 }
 
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
 {
 	LIST_HEAD(one_request);
-	struct nfs_page		*req;
-	int			error = 0;
-	unsigned int		pages = 0;
+	int (*flush_one)(struct inode *, struct list_head *, int);
+	struct nfs_page	*req;
+	int wpages = NFS_SERVER(inode)->wpages;
+	int wsize = NFS_SERVER(inode)->wsize;
+	int error;
 
-	while (!list_empty(head)) {
-		pages += nfs_coalesce_requests(head, &one_request, wpages);
+	flush_one = nfs_flush_one;
+	if (wsize < PAGE_CACHE_SIZE)
+		flush_one = nfs_flush_multi;
+	/* For single writes, FLUSH_STABLE is more efficient */
+	if (npages <= wpages && npages == NFS_I(inode)->npages
+			&& nfs_list_entry(head->next)->wb_bytes <= wsize)
+		how |= FLUSH_STABLE;
+
+	do {
+		nfs_coalesce_requests(head, &one_request, wpages);
 		req = nfs_list_entry(one_request.next);
-		error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+		error = flush_one(inode, &one_request, how);
 		if (error < 0)
-			break;
-	}
-	if (error >= 0)
-		return pages;
-
+			goto out_err;
+	} while (!list_empty(head));
+	return 0;
+out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 /*
  * Handle a write reply that flushed part of a page.
  */
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req = data->req;
 	struct page		*page = req->wb_page;
 
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		req->wb_bytes,
 		(long long)req_offset(req));
 
-	if (status < 0) {
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	if (task->tk_status < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		req->wb_context->error = status;
-		dprintk(", error = %d\n", status);
+		req->wb_context->error = task->tk_status;
+		dprintk(", error = %d\n", task->tk_status);
 	} else {
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
 		nfs_writepage_release(req);
 }
 
+static const struct rpc_call_ops nfs_write_partial_ops = {
+	.rpc_call_done = nfs_writeback_done_partial,
+	.rpc_release = nfs_writedata_release,
+};
+
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	struct page		*page;
 
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (status < 0) {
+		if (task->tk_status < 0) {
 			ClearPageUptodate(page);
 			SetPageError(page);
-			req->wb_context->error = status;
+			req->wb_context->error = task->tk_status;
 			end_page_writeback(page);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", status);
+			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
 		end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 	}
 }
 
+static const struct rpc_call_ops nfs_write_full_ops = {
+	.rpc_call_done = nfs_writeback_done_full,
+	.rpc_release = nfs_writedata_release,
+};
+
+
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	int status;
 
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	status = NFS_PROTO(data->inode)->write_done(task, data);
+	if (status != 0)
+		return status;
+	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
+		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 				argp->stable = NFS_FILE_SYNC;
 			}
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-
-	/*
-	 * Process the nfs_page list
-	 */
-	data->complete(data, task->tk_status);
+	return 0;
 }
 
 
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
  * Set up the argument/result storage required for the RPC call.
  */
 static void nfs_commit_rpcsetup(struct list_head *head,
-		struct nfs_write_data *data, int how)
+		struct nfs_write_data *data,
+		int how)
 {
 	struct nfs_page		*first;
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
-	
+
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
 	}
 	sub_page_state(nr_unstable,res);
 }
+
+static const struct rpc_call_ops nfs_commit_ops = {
+	.rpc_call_done = nfs_commit_done,
+	.rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+	return 0;
+}
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		struct nfs_server *server = NFS_SERVER(inode);
-
-		/* For single writes, FLUSH_STABLE is more efficient */
-		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
-			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
-				how |= FLUSH_STABLE;
-		}
-		error = nfs_flush_list(&head, server->wpages, how);
+		int error = nfs_flush_list(inode, &head, res, how);
+		if (error < 0)
+			return error;
 	}
-	if (error < 0)
-		return error;
 	return res;
 }
 
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
-	int			res,
-				error = 0;
+	int res;
 
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		error = nfs_commit_list(inode, &head, how);
+		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 	}
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
-		  unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+		unsigned int npages, int how)
 {
+	struct nfs_inode *nfsi = NFS_I(inode);
+	LIST_HEAD(head);
 	int nocommit = how & FLUSH_NOCOMMIT;
-	int wait = how & FLUSH_WAIT;
-	int error;
-
-	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+	int pages, ret;
 
+	how &= ~FLUSH_NOCOMMIT;
+	spin_lock(&nfsi->req_lock);
 	do {
-		if (wait) {
-			error = nfs_wait_on_requests(inode, idx_start, npages);
-			if (error != 0)
-				continue;
-		}
-		error = nfs_flush_inode(inode, idx_start, npages, how);
-		if (error != 0)
+		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+		if (ret != 0)
 			continue;
-		if (!nocommit)
-			error = nfs_commit_inode(inode, how);
-	} while (error > 0);
-	return error;
+		pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+		if (pages != 0) {
+			spin_unlock(&nfsi->req_lock);
+			ret = nfs_flush_list(inode, &head, pages, how);
+			spin_lock(&nfsi->req_lock);
+			continue;
+		}
+		if (nocommit)
+			break;
+		pages = nfs_scan_commit(inode, &head, 0, 0);
+		if (pages == 0)
+			break;
+		spin_unlock(&nfsi->req_lock);
+		ret = nfs_commit_list(inode, &head, how);
+		spin_lock(&nfsi->req_lock);
+	} while (ret >= 0);
+	spin_unlock(&nfsi->req_lock);
+	return ret;
 }
 
 int nfs_init_writepagecache(void)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8d3d23c8a4d..c872bd07fc1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
         .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
         .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+        .p_statidx = NFSPROC4_CB_##call,				\
+	.p_name   = #proc,                                              \
 }
 
 static struct rpc_procinfo     nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb6454..f6ab762bea9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
-	struct file_lock *conflock;
+	struct file_lock conflock;
 	int status = 0;
 	unsigned int strhashval;
 
@@ -2775,11 +2775,11 @@ conflicting_lock:
 	/* XXX There is a race here. Future patch needed to provide 
 	 * an atomic posix_lock_and_test_file
 	 */
-	if (!(conflock = posix_test_lock(filp, &file_lock))) {
+	if (!posix_test_lock(filp, &file_lock, &conflock)) {
 		status = nfserr_serverfault;
 		goto out;
 	}
-	nfs4_set_lock_denied(conflock, &lock->lk_denied);
+	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
-	struct file_lock *conflicting_lock;
+	struct file_lock conflock;
 	int status;
 
 	if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	file.f_dentry = current_fh->fh_dentry;
 
 	status = nfs_ok;
-	conflicting_lock = posix_test_lock(&file, &file_lock);
-	if (conflicting_lock) {
+	if (posix_test_lock(&file, &file_lock, &conflock)) {
 		status = nfserr_denied;
-		nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+		nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
 	}
 out:
 	nfs4_unlock_state();
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 6b9812db377..edaab05a93e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -566,9 +566,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 			next_free = le16_to_cpu(el->l_next_free_rec);
 			if (next_free == 0) {
 				ocfs2_error(inode->i_sb,
-					    "Dinode %"MLFu64" has a bad "
-					    "extent list",
-					    OCFS2_I(inode)->ip_blkno);
+					    "Dinode %llu has a bad extent list",
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 				status = -EIO;
 				goto bail;
 			}
@@ -611,9 +610,8 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		next_free = le16_to_cpu(el->l_next_free_rec);
 		if (next_free == 0) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" has a bad "
-				    "extent list",
-				    OCFS2_I(inode)->ip_blkno);
+				    "Dinode %llu has a bad extent list",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
@@ -652,8 +650,9 @@ static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
 		/* having an empty extent at eof is legal. */
 		if (el->l_recs[i].e_cpos != fe->i_clusters) {
 			ocfs2_error(inode->i_sb,
-				    "Dinode %"MLFu64" trailing extent is bad: "
+				    "Dinode %llu trailing extent is bad: "
 				    "cpos (%u) != number of clusters (%u)",
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    le32_to_cpu(el->l_recs[i].e_cpos),
 				    le32_to_cpu(fe->i_clusters));
 			status = -EIO;
@@ -747,19 +746,19 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 
 	while(le16_to_cpu(el->l_tree_depth) > 1) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty "
+			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
 				    "extent list (next_free_rec == 0)",
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			status = -EIO;
 			goto bail;
 		}
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (!blkno) {
-			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent "
+			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
 				    "list where extent # %d has no physical "
 				    "block start",
-				    OCFS2_I(inode)->ip_blkno, i);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
 			status = -EIO;
 			goto bail;
 		}
@@ -826,9 +825,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "add %u clusters starting at block %"MLFu64" to "
-		"inode %"MLFu64"\n",
-	     new_clusters, start_blk, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
+	     new_clusters, (unsigned long long)start_blk,
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 	el = &fe->id2.i_list;
@@ -963,8 +962,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	struct ocfs2_dinode *di;
 	struct ocfs2_truncate_log *tl;
 
-	mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
-		   num_clusters);
+	mlog_entry("start_blk = %llu, num_clusters = %u\n",
+		   (unsigned long long)start_blk, num_clusters);
 
 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
 
@@ -981,8 +980,9 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	tl_count = le16_to_cpu(tl->tl_count);
 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
 			tl_count == 0,
-			"Truncate record count on #%"MLFu64" invalid ("
-			"wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
+			"Truncate record count on #%llu invalid "
+			"wanted %u, actual %u\n",
+			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
 			ocfs2_truncate_recs_per_inode(osb->sb),
 			le16_to_cpu(tl->tl_count));
 
@@ -1002,8 +1002,8 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 	}
 
 	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
-	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
-	     OCFS2_I(tl_inode)->ip_blkno, index);
+	     "%llu (index = %d)\n", num_clusters, start_cluster,
+	     (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
 
 	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
 		/*
@@ -1134,8 +1134,8 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	}
 
 	num_to_flush = le16_to_cpu(tl->tl_used);
-	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
-	     num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
+	mlog(0, "Flush %u records from truncate log #%llu\n",
+	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
 	if (!num_to_flush) {
 		status = 0;
 		goto bail;
@@ -1360,8 +1360,8 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 
 	tl = &tl_copy->id2.i_dealloc;
 	num_recs = le16_to_cpu(tl->tl_used);
-	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
-	     tl_copy->i_blkno);
+	mlog(0, "cleanup %u records from %llu\n", num_recs,
+	     (unsigned long long)tl_copy->i_blkno);
 
 	mutex_lock(&tl_inode->i_mutex);
 	for(i = 0; i < num_recs; i++) {
@@ -1529,7 +1529,8 @@ static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
 
 	*new_last_eb = bh;
 	get_bh(*new_last_eb);
-	mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno));
+	mlog(0, "returning block %llu\n",
+	     (unsigned long long)le64_to_cpu(eb->h_blkno));
 bail:
 	if (bh)
 		brelse(bh);
@@ -1646,8 +1647,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 	/* if our tree depth > 0, update all the tree blocks below us. */
 	while (depth) {
-		mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n",
-		     depth,  next_eb);
+		mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
+		     depth,  (unsigned long long)next_eb);
 		status = ocfs2_read_block(osb, next_eb, &eb_bh,
 					  OCFS2_BH_CACHED, inode);
 		if (status < 0) {
@@ -1674,12 +1675,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 
-		mlog(0, "extent block %"MLFu64", before: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, before: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
@@ -1697,12 +1698,12 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			BUG_ON(!el->l_next_free_rec);
 			le16_add_cpu(&el->l_next_free_rec, -1);
 		}
-		mlog(0, "extent block %"MLFu64", after: record %d: "
-		     "(%u, %u, %"MLFu64"), next = %u\n",
-		     le64_to_cpu(eb->h_blkno), i,
+		mlog(0, "extent block %llu, after: record %d: "
+		     "(%u, %u, %llu), next = %u\n",
+		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
 		     le32_to_cpu(el->l_recs[i].e_cpos),
 		     le32_to_cpu(el->l_recs[i].e_clusters),
-		     le64_to_cpu(el->l_recs[i].e_blkno),
+		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
 		     le16_to_cpu(el->l_next_free_rec));
 
 		status = ocfs2_journal_dirty(handle, eb_bh);
@@ -1792,10 +1793,10 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	last_eb = le64_to_cpu(fe->i_last_eb_blk);
 start:
 	mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
-	     "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", "
+	     "last_eb = %llu, fe->i_last_eb_blk = %llu, "
 	     "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
-	     le32_to_cpu(fe->i_clusters), last_eb,
-	     le64_to_cpu(fe->i_last_eb_blk),
+	     le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
+	     (unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
 	     le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
 
 	if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
@@ -1934,16 +1935,17 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
-	     "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size);
+	     "%llu\n", fe->i_clusters, new_i_clusters,
+	     (unsigned long long)fe->i_size);
 
 	if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
-		ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count "
-			    "%u and size %"MLFu64" whereas struct inode has "
+		ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
+			    "%u and size %llu whereas struct inode has "
 			    "cluster count %u and size %llu which caused an "
 			    "invalid truncate to %u clusters.",
-			    le64_to_cpu(fe->i_blkno),
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->i_clusters),
-			    le64_to_cpu(fe->i_size),
+			    (unsigned long long)le64_to_cpu(fe->i_size),
 			    OCFS2_I(inode)->ip_clusters, i_size_read(inode),
 			    new_i_clusters);
 		mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8f4467a930a..bf931ba1d36 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -74,8 +74,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 	fe = (struct ocfs2_dinode *) bh->b_data;
 
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		goto bail;
 	}
 
@@ -162,8 +162,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 					  NULL);
 	if (err) {
 		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
-		     "%"MLFu64", NULL)\n", err, inode,
-		     (unsigned long long)iblock, p_blkno);
+		     "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
+		     (unsigned long long)p_blkno);
 		goto bail;
 	}
 
@@ -171,13 +171,15 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
 	if (bh_result->b_blocknr == 0) {
 		err = -EIO;
-		mlog(ML_ERROR, "iblock = %llu p_blkno = %"MLFu64" "
-		     "blkno=(%"MLFu64")\n", (unsigned long long)iblock,
-		     p_blkno, OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n",
+		     (unsigned long long)iblock,
+		     (unsigned long long)p_blkno,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 
 	past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-	mlog(0, "Inode %lu, past_eof = %"MLFu64"\n", inode->i_ino, past_eof);
+	mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+	     (unsigned long long)past_eof);
 
 	if (create && (iblock >= past_eof))
 		set_buffer_new(bh_result);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index bae3d7548be..9a24adf9be6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -97,8 +97,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
 
-	mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n",
-		   block, nr, flags, inode);
+	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
+		   (unsigned long long)block, nr, flags, inode);
 
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
@@ -143,9 +143,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
-			     "bh (%llu), inode %"MLFu64" not uptodate\n",
+			     "bh (%llu), inode %llu not uptodate\n",
 			     (unsigned long long)bh->b_blocknr,
-			     OCFS2_I(inode)->ip_blkno);
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			ignore_cache = 1;
 		}
 
@@ -222,7 +222,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	     (unsigned long long)block, nr,
 	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
 
 bail:
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d08971d29b6..bff0f0d0686 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -449,11 +449,11 @@ static u32 o2hb_compute_block_crc_le(struct o2hb_region *reg,
 
 static void o2hb_dump_slot(struct o2hb_disk_heartbeat_block *hb_block)
 {
-	mlog(ML_ERROR, "Dump slot information: seq = 0x%"MLFx64", node = %u, "
-	     "cksum = 0x%x, generation 0x%"MLFx64"\n",
-	     le64_to_cpu(hb_block->hb_seq), hb_block->hb_node,
-	     le32_to_cpu(hb_block->hb_cksum),
-	     le64_to_cpu(hb_block->hb_generation));
+	mlog(ML_ERROR, "Dump slot information: seq = 0x%llx, node = %u, "
+	     "cksum = 0x%x, generation 0x%llx\n",
+	     (long long)le64_to_cpu(hb_block->hb_seq),
+	     hb_block->hb_node, le32_to_cpu(hb_block->hb_cksum),
+	     (long long)le64_to_cpu(hb_block->hb_generation));
 }
 
 static int o2hb_verify_crc(struct o2hb_region *reg,
@@ -516,8 +516,9 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
 	hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
 								   hb_block));
 
-	mlog(ML_HB_BIO, "our node generation = 0x%"MLFx64", cksum = 0x%x\n",
-	     cpu_to_le64(generation), le32_to_cpu(hb_block->hb_cksum));
+	mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n",
+	     (long long)cpu_to_le64(generation),
+	     le32_to_cpu(hb_block->hb_cksum));
 }
 
 static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
@@ -686,19 +687,20 @@ static int o2hb_check_slot(struct o2hb_region *reg,
 	if (slot->ds_last_generation != le64_to_cpu(hb_block->hb_generation)) {
 		gen_changed = 1;
 		slot->ds_equal_samples = 0;
-		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%"MLFx64" "
-		     "to 0x%"MLFx64")\n", slot->ds_node_num,
-		     slot->ds_last_generation,
-		     le64_to_cpu(hb_block->hb_generation));
+		mlog(ML_HEARTBEAT, "Node %d changed generation (0x%llx "
+		     "to 0x%llx)\n", slot->ds_node_num,
+		     (long long)slot->ds_last_generation,
+		     (long long)le64_to_cpu(hb_block->hb_generation));
 	}
 
 	slot->ds_last_generation = le64_to_cpu(hb_block->hb_generation);
 
-	mlog(ML_HEARTBEAT, "Slot %d gen 0x%"MLFx64" cksum 0x%x "
-	     "seq %"MLFu64" last %"MLFu64" changed %u equal %u\n",
-	     slot->ds_node_num, slot->ds_last_generation,
-	     le32_to_cpu(hb_block->hb_cksum), le64_to_cpu(hb_block->hb_seq), 
-	     slot->ds_last_time, slot->ds_changed_samples,
+	mlog(ML_HEARTBEAT, "Slot %d gen 0x%llx cksum 0x%x "
+	     "seq %llu last %llu changed %u equal %u\n",
+	     slot->ds_node_num, (long long)slot->ds_last_generation,
+	     le32_to_cpu(hb_block->hb_cksum),
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq), 
+	     (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
 	     slot->ds_equal_samples);
 
 	spin_lock(&o2hb_live_lock);
@@ -708,8 +710,8 @@ fire_callbacks:
 	 * changes at any time during their dead time */
 	if (list_empty(&slot->ds_live_item) &&
 	    slot->ds_changed_samples >= O2HB_LIVE_THRESHOLD) {
-		mlog(ML_HEARTBEAT, "Node %d (id 0x%"MLFx64") joined my "
-		     "region\n", slot->ds_node_num, slot->ds_last_generation);
+		mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
+		     slot->ds_node_num, (long long)slot->ds_last_generation);
 
 		/* first on the list generates a callback */
 		if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2cadc3009c8..73edad78253 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -256,16 +256,6 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
 	}								\
 } while (0)
 
-#if (BITS_PER_LONG == 32) || defined(CONFIG_X86_64) || (defined(CONFIG_UML_X86) && defined(CONFIG_64BIT))
-#define MLFi64 "lld"
-#define MLFu64 "llu"
-#define MLFx64 "llx"
-#else
-#define MLFi64 "ld"
-#define MLFu64 "lu"
-#define MLFx64 "lx"
-#endif
-
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 int mlog_sys_init(struct subsystem *o2cb_subsys);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index bd85182e97b..1a01380e387 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -64,15 +64,16 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 		/* did we or someone else delete this inode? */
 		if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
 			spin_unlock(&OCFS2_I(inode)->ip_lock);
-			mlog(0, "inode (%"MLFu64") deleted, returning false\n",
-			     OCFS2_I(inode)->ip_blkno);
+			mlog(0, "inode (%llu) deleted, returning false\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			goto bail;
 		}
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 		if (!inode->i_nlink) {
-			mlog(0, "Inode %"MLFu64" orphaned, returning false "
-			     "dir = %d\n", OCFS2_I(inode)->ip_blkno,
+			mlog(0, "Inode %llu orphaned, returning false "
+			     "dir = %d\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     S_ISDIR(inode->i_mode));
 			goto bail;
 		}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 57158fa75d9..ae47f450792 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -83,7 +83,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct super_block * sb = inode->i_sb;
 	int have_disk_lock = 0;
 
-	mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("dirino=%llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	stored = 0;
 	bh = NULL;
@@ -104,9 +105,9 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		blk = (filp->f_pos) >> sb->s_blocksize_bits;
 		bh = ocfs2_bread(inode, blk, &err, 0);
 		if (!bh) {
-			mlog(ML_ERROR, "directory #%"MLFu64" contains a hole "
-				       "at offset %lld\n",
-			     OCFS2_I(inode)->ip_blkno,
+			mlog(ML_ERROR,
+			     "directory #%llu contains a hole at offset %lld\n",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			     filp->f_pos);
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
@@ -214,9 +215,9 @@ int ocfs2_find_files_on_disk(const char *name,
 	int status = -ENOENT;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	mlog_entry("(osb=%p, parent=%"MLFu64", name='%.*s', blkno=%p, "
-		   "inode=%p)\n",
-		   osb, OCFS2_I(inode)->ip_blkno, namelen, name, blkno, inode);
+	mlog_entry("(osb=%p, parent=%llu, name='%.*s', blkno=%p, inode=%p)\n",
+		   osb, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   namelen, name, blkno, inode);
 
 	*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
 	if (!*dirent_bh || !*dirent) {
@@ -255,8 +256,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
 	struct buffer_head *dirent_bh = NULL;
 	struct ocfs2_dir_entry *dirent = NULL;
 
-	mlog_entry("dir %"MLFu64", name '%.*s'\n", OCFS2_I(dir)->ip_blkno,
-		   namelen, name);
+	mlog_entry("dir %llu, name '%.*s'\n",
+		   (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
 
 	ret = -EEXIST;
 	dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
@@ -287,9 +288,8 @@ int ocfs2_empty_dir(struct inode *inode)
 	if ((i_size_read(inode) <
 	     (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
 	    !(bh = ocfs2_bread(inode, 0, &err, 0))) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no data block\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		return 1;
 	}
 
@@ -300,9 +300,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			!le64_to_cpu(de1->inode) ||
 			strcmp(".", de->name) ||
 			strcmp("..", de1->name)) {
-	    	mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
-			       "no `.' or `..'\n",
-		     OCFS2_I(inode)->ip_blkno);
+	    	mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		brelse(bh);
 		return 1;
 	}
@@ -314,9 +313,8 @@ int ocfs2_empty_dir(struct inode *inode)
 			bh = ocfs2_bread(inode,
 					 offset >> sb->s_blocksize_bits, &err, 0);
 			if (!bh) {
-				mlog(ML_ERROR, "directory #%"MLFu64" contains "
-					       "a hole at offset %lu\n",
-				     OCFS2_I(inode)->ip_blkno, offset);
+				mlog(ML_ERROR, "dir %llu has a hole at %lu\n",
+				     (unsigned long long)OCFS2_I(inode)->ip_blkno, offset);
 				offset += sb->s_blocksize;
 				continue;
 			}
@@ -406,8 +404,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	mlog_entry_void();
 
 	dir_i_size = i_size_read(dir);
-	mlog(0, "extending dir %"MLFu64" (i_size = %lld)\n",
-	     OCFS2_I(dir)->ip_blkno, dir_i_size);
+	mlog(0, "extending dir %llu (i_size = %lld)\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
 
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
@@ -531,8 +529,8 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	mlog(0, "getting ready to insert namelen %d into dir %"MLFu64"\n",
-	     namelen, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "getting ready to insert namelen %d into dir %llu\n",
+	     namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	BUG_ON(!S_ISDIR(dir->i_mode));
 	fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 8d17d28ef91..355593dd8ef 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -307,8 +307,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	if (past->type != DLM_AST &&
 	    past->type != DLM_BAST) {
-		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", "
-		     "name=%.*s\n", past->type, cookie, locklen, name);
+		mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
+		     "name=%.*s\n", past->type, 
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -316,9 +319,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 	res = dlm_lookup_lockres(dlm, name, locklen);
 	if (!res) {
 		mlog(ML_ERROR, "got %sast for unknown lockres! "
-			       "cookie=%"MLFu64", name=%.*s, namelen=%u\n",
+			       "cookie=%u:%llu, name=%.*s, namelen=%u\n",
 		     past->type == DLM_AST ? "" : "b",
-		     cookie, locklen, name, locklen);
+		     dlm_get_lock_cookie_node(cookie),
+		     dlm_get_lock_cookie_seq(cookie),
+		     locklen, name, locklen);
 		ret = DLM_IVLOCKID;
 		goto leave;
 	}
@@ -360,9 +365,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%"MLFu64", "
-		       "name=%.*s, namelen=%u\n",
-             past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen);
+	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
+		       "name=%.*s, namelen=%u\n", 
+		       past->type == DLM_AST ? "" : "b", 
+		       dlm_get_lock_cookie_node(cookie),
+		       dlm_get_lock_cookie_seq(cookie),
+		       locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9c772583744..88cc43df18f 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -630,6 +630,21 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res)
 	return status;
 }
 
+static inline u8 dlm_get_lock_cookie_node(u64 cookie)
+{
+	u8 ret;
+	cookie >>= 56;
+	ret = (u8)(cookie & 0xffULL);
+	return ret;
+}
+
+static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie)
+{
+	unsigned long long ret;
+	ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL;
+	return ret;
+}
+
 struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 			       struct dlm_lockstatus *lksb);
 void dlm_lock_get(struct dlm_lock *lock);
@@ -658,6 +673,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
 
@@ -762,6 +778,11 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master);
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master);
+
 
 int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       struct dlm_lock_resource *res,
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f66e2d818cc..8285228d9e3 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -284,8 +284,10 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 	if (lock->ml.convert_type != LKM_IVMODE) {
 		__dlm_print_one_lock_resource(res);
 		mlog(ML_ERROR, "converting a remote lock that is already "
-		     "converting! (cookie=%"MLFu64", conv=%d)\n",
-		     lock->ml.cookie, lock->ml.convert_type);
+		     "converting! (cookie=%u:%llu, conv=%d)\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     lock->ml.convert_type);
 		status = DLM_DENIED;
 		goto bail;
 	}
@@ -513,8 +515,9 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 leave:
 	if (!lock)
 		mlog(ML_ERROR, "did not find lock to convert on grant queue! "
-			       "cookie=%"MLFu64"\n",
-		     cnv->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(cnv->cookie),
+			       dlm_get_lock_cookie_seq(cnv->cookie));
 	else
 		dlm_lock_put(lock);
 
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 54f61b76ab5..c7eae5d3324 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -72,8 +72,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -85,8 +87,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -98,8 +102,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 		lock = list_entry(iter2, struct dlm_lock, list);
 		spin_lock(&lock->spinlock);
 		mlog(ML_NOTICE, "    type=%d, conv=%d, node=%u, "
-		       "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
-		       lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, 
+		       "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 
+		       lock->ml.type, lock->ml.convert_type, lock->ml.node, 
+		       dlm_get_lock_cookie_node(lock->ml.cookie), 
+		       dlm_get_lock_cookie_seq(lock->ml.cookie), 
 		       list_empty(&lock->ast_list) ? 'y' : 'n',
 		       lock->ast_pending ? 'y' : 'n',
 		       list_empty(&lock->bast_list) ? 'y' : 'n',
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 671d4ff222c..6fea28318d6 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
 					  res->lockname.len)) {
 			kick_thread = 1;
 			call_ast = 1;
+		} else {
+			mlog(0, "%s: returning DLM_NORMAL to "
+			     "node %u for reco lock\n", dlm->name,
+			     lock->ml.node);
 		}
 	} else {
 		/* for NOQUEUE request, unless we get the
 		 * lock right away, return DLM_NOTQUEUED */
-		if (flags & LKM_NOQUEUE)
+		if (flags & LKM_NOQUEUE) {
 			status = DLM_NOTQUEUED;
-		else {
+			if (dlm_is_recovery_lock(res->lockname.name,
+						 res->lockname.len)) {
+				mlog(0, "%s: returning NOTQUEUED to "
+				     "node %u for reco lock\n", dlm->name,
+				     lock->ml.node);
+			}
+		} else {
 			dlm_lock_get(lock);
 			list_add_tail(&lock->list, &res->blocked);
 			kick_thread = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 847dd3cc4cf..940be4c13b1 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
 				       struct dlm_lock_resource *res,
 				       u8 target);
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res);
 
 
 int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	struct dlm_node_iter iter;
 	unsigned int namelen;
 	int tries = 0;
+	int bit, wait_on_recovery = 0;
 
 	BUG_ON(!lockid);
 
@@ -762,6 +765,18 @@ lookup:
 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
 		set_bit(dlm->node_num, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
+
+		/* still holding the dlm spinlock, check the recovery map
+		 * to see if there are any nodes that still need to be 
+		 * considered.  these will not appear in the mle nodemap
+		 * but they might own this lockres.  wait on them. */
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		}
 	}
 
 	/* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
+	while (wait_on_recovery) {
+		/* any cluster changes that occurred after dropping the
+		 * dlm spinlock would be detectable be a change on the mle,
+		 * so we only need to clear out the recovery map once. */
+		if (dlm_is_recovery_lock(lockid, namelen)) {
+			mlog(ML_NOTICE, "%s: recovery map is not empty, but "
+			     "must master $RECOVERY lock now\n", dlm->name);
+			if (!dlm_pre_master_reco_lockres(dlm, res))
+				wait_on_recovery = 0;
+			else {
+				mlog(0, "%s: waiting 500ms for heartbeat state "
+				    "change\n", dlm->name);
+				msleep(500);
+			}
+			continue;
+		} 
+
+		dlm_kick_recovery_thread(dlm);
+		msleep(100);
+		dlm_wait_for_recovery(dlm);
+
+		spin_lock(&dlm->spinlock);
+		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+		if (bit < O2NM_MAX_NODES) {
+			mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+			     "recover before lock mastery can begin\n",
+			     dlm->name, namelen, (char *)lockid, bit);
+			wait_on_recovery = 1;
+		} else
+			wait_on_recovery = 0;
+		spin_unlock(&dlm->spinlock);
+	}
+
 	/* must wait for lock to be mastered elsewhere */
 	if (blocked)
 		goto wait;
@@ -792,7 +840,15 @@ redo_request:
 			mlog_errno(ret);
 		if (mle->master != O2NM_MAX_NODES) {
 			/* found a master ! */
-			break;
+			if (mle->master <= nodenum)
+				break;
+			/* if our master request has not reached the master
+			 * yet, keep going until it does.  this is how the
+			 * master will know that asserts are needed back to
+			 * the lower nodes. */
+			mlog(0, "%s:%.*s: requests only up to %u but master "
+			     "is %u, keep going\n", dlm->name, namelen,
+			     lockid, nodenum, mle->master);
 		}
 	}
 
@@ -860,7 +916,19 @@ recheck:
 	/* check if another node has already become the owner */
 	spin_lock(&res->spinlock);
 	if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
+		mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name,
+		     res->lockname.len, res->lockname.name, res->owner);
 		spin_unlock(&res->spinlock);
+		/* this will cause the master to re-assert across
+		 * the whole cluster, freeing up mles */
+		ret = dlm_do_master_request(mle, res->owner);
+		if (ret < 0) {
+			/* give recovery a chance to run */
+			mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
+			msleep(500);
+			goto recheck;
+		}
+		ret = 0;
 		goto leave;
 	}
 	spin_unlock(&res->spinlock);
@@ -1244,13 +1312,14 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 {
 	u8 response = DLM_MASTER_RESP_MAYBE;
 	struct dlm_ctxt *dlm = data;
-	struct dlm_lock_resource *res;
+	struct dlm_lock_resource *res = NULL;
 	struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
 	struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
 	char *name;
 	unsigned int namelen;
 	int found, ret;
 	int set_maybe;
+	int dispatch_assert = 0;
 
 	if (!dlm_grab(dlm))
 		return DLM_MASTER_RESP_NO;
@@ -1287,7 +1356,6 @@ way_up_top:
 		}
 
 		if (res->owner == dlm->node_num) {
-			u32 flags = DLM_ASSERT_MASTER_MLE_CLEANUP;
 			spin_unlock(&res->spinlock);
 			// mlog(0, "this node is the master\n");
 			response = DLM_MASTER_RESP_YES;
@@ -1300,16 +1368,7 @@ way_up_top:
 			 * caused all nodes up to this one to
 			 * create mles.  this node now needs to
 			 * go back and clean those up. */
-			mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
-			     dlm->node_num, res->lockname.len, res->lockname.name);
-			ret = dlm_dispatch_assert_master(dlm, res, 1,
-							 request->node_idx,
-							 flags);
-			if (ret < 0) {
-				mlog(ML_ERROR, "failed to dispatch assert "
-				     "master work\n");
-				response = DLM_MASTER_RESP_ERROR;
-			}
+			dispatch_assert = 1;
 			goto send_response;
 		} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			spin_unlock(&res->spinlock);
@@ -1357,9 +1416,13 @@ way_up_top:
 			}
 		} else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			set_maybe = 0;
-			if (tmpmle->master == dlm->node_num)
+			if (tmpmle->master == dlm->node_num) {
 				response = DLM_MASTER_RESP_YES;
-			else
+				/* this node will be the owner.
+				 * go back and clean the mles on any
+				 * other nodes */
+				dispatch_assert = 1;
+			} else
 				response = DLM_MASTER_RESP_NO;
 		} else {
 			// mlog(0, "this node is attempting to "
@@ -1398,8 +1461,8 @@ way_up_top:
 			mle = (struct dlm_master_list_entry *)
 				kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL);
 			if (!mle) {
-				// bad bad bad... this sucks.
 				response = DLM_MASTER_RESP_ERROR;
+				mlog_errno(-ENOMEM);
 				goto send_response;
 			}
 			spin_lock(&dlm->spinlock);
@@ -1418,25 +1481,19 @@ way_up_top:
 		// mlog(0, "mle was found\n");
 		set_maybe = 1;
 		spin_lock(&tmpmle->spinlock);
+		if (tmpmle->master == dlm->node_num) {
+			mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
+			BUG();
+		}
 		if (tmpmle->type == DLM_MLE_BLOCK)
 			response = DLM_MASTER_RESP_NO;
 		else if (tmpmle->type == DLM_MLE_MIGRATION) {
 			mlog(0, "migration mle was found (%u->%u)\n",
 			     tmpmle->master, tmpmle->new_master);
-			if (tmpmle->master == dlm->node_num) {
-				mlog(ML_ERROR, "no lockres, but migration mle "
-				     "says that this node is master!\n");
-				BUG();
-			}
 			/* real master can respond on its own */
 			response = DLM_MASTER_RESP_NO;
-		} else {
-			if (tmpmle->master == dlm->node_num) {
-				response = DLM_MASTER_RESP_YES;
-				set_maybe = 0;
-			} else
-				response = DLM_MASTER_RESP_MAYBE;
-		}
+		} else
+			response = DLM_MASTER_RESP_MAYBE;
 		if (set_maybe)
 			set_bit(request->node_idx, tmpmle->maybe_map);
 		spin_unlock(&tmpmle->spinlock);
@@ -1449,6 +1506,24 @@ way_up_top:
 		dlm_put_mle(tmpmle);
 	}
 send_response:
+
+	if (dispatch_assert) {
+		if (response != DLM_MASTER_RESP_YES)
+			mlog(ML_ERROR, "invalid response %d\n", response);
+		if (!res) {
+			mlog(ML_ERROR, "bad lockres while trying to assert!\n");
+			BUG();
+		}
+		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
+			     dlm->node_num, res->lockname.len, res->lockname.name);
+		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 
+						 DLM_ASSERT_MASTER_MLE_CLEANUP);
+		if (ret < 0) {
+			mlog(ML_ERROR, "failed to dispatch assert master work\n");
+			response = DLM_MASTER_RESP_ERROR;
+		}
+	}
+
 	dlm_put(dlm);
 	return response;
 }
@@ -1471,8 +1546,11 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 	int to, tmpret;
 	struct dlm_node_iter iter;
 	int ret = 0;
+	int reassert;
 
 	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+again:
+	reassert = 0;
 
 	/* note that if this nodemap is empty, it returns 0 */
 	dlm_node_iter_init(nodemap, &iter);
@@ -1504,9 +1582,17 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
 			     "got %d.\n", namelen, lockname, to, r);
 			dlm_dump_lock_resources(dlm);
 			BUG();
+		} else if (r == EAGAIN) {
+			mlog(0, "%.*s: node %u create mles on other "
+			     "nodes and requests a re-assert\n", 
+			     namelen, lockname, to);
+			reassert = 1;
 		}
 	}
 
+	if (reassert)
+		goto again;
+
 	return ret;
 }
 
@@ -1528,6 +1614,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 	char *name;
 	unsigned int namelen;
 	u32 flags;
+	int master_request = 0;
+	int ret = 0;
 
 	if (!dlm_grab(dlm))
 		return 0;
@@ -1642,11 +1730,22 @@ ok:
 	// mlog(0, "woo!  got an assert_master from node %u!\n",
 	// 	     assert->node_idx);
 	if (mle) {
-		int extra_ref;
+		int extra_ref = 0;
+		int nn = -1;
 		
 		spin_lock(&mle->spinlock);
-		extra_ref = !!(mle->type == DLM_MLE_BLOCK
-			       || mle->type == DLM_MLE_MIGRATION);
+		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
+			extra_ref = 1;
+		else {
+			/* MASTER mle: if any bits set in the response map
+			 * then the calling node needs to re-assert to clear
+			 * up nodes that this node contacted */
+			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 
+						    nn+1)) < O2NM_MAX_NODES) {
+				if (nn != dlm->node_num && nn != assert->node_idx)
+					master_request = 1;
+			}
+		}
 		mle->master = assert->node_idx;
 		atomic_set(&mle->woken, 1);
 		wake_up(&mle->wq);
@@ -1677,10 +1776,15 @@ ok:
 	}
 
 done:
+	ret = 0;
 	if (res)
 		dlm_lockres_put(res);
 	dlm_put(dlm);
-	return 0;
+	if (master_request) {
+		mlog(0, "need to tell master to reassert\n");
+		ret = EAGAIN;  // positive. negative would shoot down the node.
+	}
+	return ret;
 
 kill:
 	/* kill the caller! */
@@ -1713,6 +1817,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 	item->u.am.request_from = request_from;
 	item->u.am.flags = flags;
 
+	if (ignore_higher) 
+		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 
+		     res->lockname.name);
+		
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
@@ -1775,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
 	mlog(0, "finished with dlm_assert_master_worker\n");
 }
 
+/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
+ * We cannot wait for node recovery to complete to begin mastering this
+ * lockres because this lockres is used to kick off recovery! ;-)
+ * So, do a pre-check on all living nodes to see if any of those nodes
+ * think that $RECOVERY is currently mastered by a dead node.  If so,
+ * we wait a short time to allow that node to get notified by its own
+ * heartbeat stack, then check again.  All $RECOVERY lock resources
+ * mastered by dead nodes are purged when the hearbeat callback is 
+ * fired, so we can know for sure that it is safe to continue once
+ * the node returns a live node or no node.  */
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+				       struct dlm_lock_resource *res)
+{
+	struct dlm_node_iter iter;
+	int nodenum;
+	int ret = 0;
+	u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
+
+	spin_lock(&dlm->spinlock);
+	dlm_node_iter_init(dlm->domain_map, &iter);
+	spin_unlock(&dlm->spinlock);
+
+	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+		/* do not send to self */
+		if (nodenum == dlm->node_num)
+			continue;
+		ret = dlm_do_master_requery(dlm, res, nodenum, &master);
+		if (ret < 0) {
+			mlog_errno(ret);
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
+		}
+
+		if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
+			/* check to see if this master is in the recovery map */
+			spin_lock(&dlm->spinlock);
+			if (test_bit(master, dlm->recovery_map)) {
+				mlog(ML_NOTICE, "%s: node %u has not seen "
+				     "node %u go down yet, and thinks the "
+				     "dead node is mastering the recovery "
+				     "lock.  must wait.\n", dlm->name,
+				     nodenum, master);
+				ret = -EAGAIN;
+			}
+			spin_unlock(&dlm->spinlock);
+			mlog(0, "%s: reco lock master is %u\n", dlm->name, 
+			     master);
+			break;
+		}
+	}
+	return ret;
+}
+
 
 /*
  * DLM_MIGRATE_LOCKRES
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f..805cbabac05 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_recovery_thread(void *data);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 static int dlm_do_recovery(struct dlm_ctxt *dlm);
 
 static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
 				    u8 send_to,
 				    struct dlm_lock_resource *res,
 				    int total_locks);
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master);
 static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 				     struct dlm_lock_resource *res,
 				     struct dlm_migratable_lockres *mres);
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master);
 static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
 static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
 				 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
  * RECOVERY THREAD
  */
 
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
 {
 	/* wake the recovery thread
 	 * this will wake the reco thread in one of three places
@@ -750,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 		     dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
 		     dead_node, reco_master);
 		mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
-		     "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
+		     "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
 		     dlm->name, mres->lockname_len, mres->lockname, mres->master,
 		     mres->num_locks, mres->total_locks, mres->flags,
-		     mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags,
+		     dlm_get_lock_cookie_node(mres->ml[0].cookie),
+		     dlm_get_lock_cookie_seq(mres->ml[0].cookie),
+		     mres->ml[0].list, mres->ml[0].flags,
 		     mres->ml[0].type, mres->ml[0].convert_type,
 		     mres->ml[0].highest_blocked, mres->ml[0].node);
 		BUG();
@@ -1316,9 +1312,8 @@ leave:
 
 
 
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-				      struct dlm_lock_resource *res,
-				      u8 *real_master)
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+			       struct dlm_lock_resource *res, u8 *real_master)
 {
 	struct dlm_node_iter iter;
 	int nodenum;
@@ -1360,8 +1355,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
 		if (ret < 0) {
 			mlog_errno(ret);
-			BUG();
-			/* TODO: need to figure a way to restart this */
+			if (!dlm_is_host_down(ret))
+				BUG();
+			/* host is down, so answer for that node would be
+			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
 		}
 		if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1369,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 }
 
 
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-				 struct dlm_lock_resource *res,
-				 u8 nodenum, u8 *real_master)
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+			  u8 nodenum, u8 *real_master)
 {
 	int ret = -EINVAL;
 	struct dlm_master_requery req;
@@ -1519,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 			/* lock is always created locally first, and
 			 * destroyed locally last.  it must be on the list */
 			if (!lock) {
+				u64 c = ml->cookie;
 				mlog(ML_ERROR, "could not find local lock "
-					       "with cookie %"MLFu64"!\n",
-				     ml->cookie);
+					       "with cookie %u:%llu!\n",
+					       dlm_get_lock_cookie_node(c),
+					       dlm_get_lock_cookie_seq(c));
 				BUG();
 			}
 			BUG_ON(lock->ml.node != ml->node);
@@ -1739,6 +1737,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 				} else
 					continue;
 
+				if (!list_empty(&res->recovering)) {
+					mlog(0, "%s:%.*s: lockres was "
+					     "marked RECOVERING, owner=%u\n",
+					     dlm->name, res->lockname.len,
+					     res->lockname.name, res->owner);
+					list_del_init(&res->recovering);
+				}
 				spin_lock(&res->spinlock);
 				dlm_change_lockres_owner(dlm, res, new_master);
 				res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2263,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 			mlog(0, "%u not in domain/live_nodes map "
 			     "so setting it in reco map manually\n",
 			     br->dead_node);
-		set_bit(br->dead_node, dlm->recovery_map);
+		/* force the recovery cleanup in __dlm_hb_node_down
+		 * both of these will be cleared in a moment */
+		set_bit(br->dead_node, dlm->domain_map);
+		set_bit(br->dead_node, dlm->live_nodes_map);
 		__dlm_hb_node_down(dlm, br->dead_node);
 	}
 	spin_unlock(&dlm->spinlock);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index c95f08d2e92..7b1a2754267 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -244,8 +244,10 @@ leave:
 	if (actions & DLM_UNLOCK_FREE_LOCK) {
 		/* this should always be coupled with list removal */
 		BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
-		mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n",
-		     lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1);
+		mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
+		     dlm_get_lock_cookie_node(lock->ml.cookie),
+		     dlm_get_lock_cookie_seq(lock->ml.cookie),
+		     atomic_read(&lock->lock_refs.refcount)-1);
 		dlm_lock_put(lock);
 	}
 	if (actions & DLM_UNLOCK_CALL_AST)
@@ -493,8 +495,9 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 not_found:
 	if (!found)
 		mlog(ML_ERROR, "failed to find lock to unlock! "
-			       "cookie=%"MLFu64"\n",
-		     unlock->cookie);
+			       "cookie=%u:%llu\n",
+			       dlm_get_lock_cookie_node(unlock->cookie),
+			       dlm_get_lock_cookie_seq(unlock->cookie));
 	else {
 		/* send the lksb->status back to the other node */
 		status = lksb->status;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e971ec2f840..84f153aca69 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -231,9 +231,9 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 
 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
 
-	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016"MLFx64"%08x",
-		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, blkno,
-		       generation);
+	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
+		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
+		       (long long)blkno, generation);
 
 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
 
@@ -533,8 +533,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 
 	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "AST fired for inode %"MLFu64", l_action = %u, type = %s\n",
-	     OCFS2_I(inode)->ip_blkno, lockres->l_action,
+	mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
 	     ocfs2_lock_type_string(lockres->l_type));
 
 	BUG_ON(!ocfs2_is_inode_lock(lockres));
@@ -544,8 +544,8 @@ static void ocfs2_inode_ast_func(void *opaque)
 	lksb = &(lockres->l_lksb);
 	if (lksb->status != DLM_NORMAL) {
 		mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
-		     "on inode %"MLFu64"\n", lksb->status,
-		     OCFS2_I(inode)->ip_blkno);
+		     "on inode %llu\n", lksb->status,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		mlog_exit_void();
 		return;
@@ -646,10 +646,9 @@ static void ocfs2_inode_bast_func(void *opaque, int level)
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "BAST fired for inode %"MLFu64", blocking = %d, level = %d "
-	     "type = %s\n", OCFS2_I(inode)->ip_blkno, level,
-	     lockres->l_level,
-	     ocfs2_lock_type_string(lockres->l_type));
+	mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
+	     lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
 
 	ocfs2_generic_bast_func(osb, lockres, level);
 
@@ -1104,7 +1103,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	/* NOTE: That we don't increment any of the holder counts, nor
 	 * do we add anything to a journal handle. Since this is
@@ -1149,8 +1148,8 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
@@ -1173,8 +1172,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s RW lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s RW lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
@@ -1193,8 +1192,8 @@ int ocfs2_data_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" take %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu take %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	/* We'll allow faking a readonly data lock for
@@ -1278,8 +1277,8 @@ void ocfs2_data_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s DATA lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s DATA lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -1462,9 +1461,9 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 
 	spin_lock(&oi->ip_lock);
 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
-		mlog(0, "Orphaned inode %"MLFu64" was deleted while we "
+		mlog(0, "Orphaned inode %llu was deleted while we "
 		     "were waiting on a lock. ip_flags = 0x%x\n",
-		     oi->ip_blkno, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
 		spin_unlock(&oi->ip_lock);
 		status = -ENOENT;
 		goto bail;
@@ -1485,8 +1484,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	ocfs2_extent_map_trunc(inode, 0);
 
 	if (ocfs2_meta_lvb_is_trustable(lockres)) {
-		mlog(0, "Trusting LVB on inode %"MLFu64"\n",
-		     oi->ip_blkno);
+		mlog(0, "Trusting LVB on inode %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
 	} else {
 		/* Boo, we have to go to disk. */
@@ -1514,15 +1513,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 		}
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
-				"Invalid dinode %"MLFu64" disk generation: %u "
+				"Invalid dinode %llu disk generation: %u "
 				"inode->i_generation: %u\n",
-				oi->ip_blkno, le32_to_cpu(fe->i_generation),
+				(unsigned long long)oi->ip_blkno,
+				le32_to_cpu(fe->i_generation),
 				inode->i_generation);
 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
-				"Stale dinode %"MLFu64" dtime: %"MLFu64" "
-				"flags: 0x%x\n", oi->ip_blkno,
-				le64_to_cpu(fe->i_dtime),
+				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
+				(unsigned long long)oi->ip_blkno,
+				(unsigned long long)le64_to_cpu(fe->i_dtime),
 				le32_to_cpu(fe->i_flags));
 
 		ocfs2_refresh_inode(inode, fe);
@@ -1581,8 +1581,8 @@ int ocfs2_meta_lock_full(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64", take %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu, take %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	status = 0;
@@ -1716,8 +1716,8 @@ void ocfs2_meta_unlock(struct inode *inode,
 
 	mlog_entry_void();
 
-	mlog(0, "inode %"MLFu64" drop %s META lock\n",
-	     OCFS2_I(inode)->ip_blkno,
+	mlog(0, "inode %llu drop %s META lock\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
@@ -2686,8 +2686,8 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 	mapping = inode->i_mapping;
 
 	if (filemap_fdatawrite(mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for downconvert!",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 	sync_mapping_buffers(mapping);
 	if (blocking == LKM_EXMODE) {
@@ -2717,7 +2717,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	inode = ocfs2_lock_res_inode(lockres);
 	osb = OCFS2_SB(inode->i_sb);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
@@ -2726,8 +2727,8 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2767,14 +2768,15 @@ int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 
        	inode = ocfs2_lock_res_inode(lockres);
 
-	mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "unblock inode %llu\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_do_unblock_meta(inode, requeue);
 	if (status < 0)
 		mlog_errno(status);
 
-	mlog(0, "inode %"MLFu64", requeue = %d\n",
-	     OCFS2_I(inode)->ip_blkno, *requeue);
+	mlog(0, "inode %llu, requeue = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
 
 	mlog_exit(status);
 	return status;
@@ -2893,12 +2895,13 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     lockres->l_name, function, line);
 	mlog(level, "version: %u, clusters: %u\n",
 	     be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters));
-	mlog(level, "size: %"MLFu64", uid %u, gid %u, mode 0x%x\n",
-	     be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid),
-	     be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode));
-	mlog(level, "nlink %u, atime_packed 0x%"MLFx64", "
-	     "ctime_packed 0x%"MLFx64", mtime_packed 0x%"MLFx64"\n",
-	     be16_to_cpu(lvb->lvb_inlink), be64_to_cpu(lvb->lvb_iatime_packed),
-	     be64_to_cpu(lvb->lvb_ictime_packed),
-	     be64_to_cpu(lvb->lvb_imtime_packed));
+	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
+	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
+	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
+	     be16_to_cpu(lvb->lvb_imode));
+	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
+	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
 }
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5810160d92a..ec55ab3c121 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -95,8 +95,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	mlog_entry("(0x%p, '%.*s')\n", child,
 		   child->d_name.len, child->d_name.name);
 
-	mlog(0, "find parent of directory %"MLFu64"\n",
-	     OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find parent of directory %llu\n",
+	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -115,7 +115,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		parent = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -160,8 +161,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 	blkno = OCFS2_I(inode)->ip_blkno;
 	generation = inode->i_generation;
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     blkno, generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)blkno, generation);
 
 	len = 3;
 	fh[0] = cpu_to_le32((u32)(blkno >> 32));
@@ -186,8 +187,8 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
 		len = 6;
 		type = 2;
 
-		mlog(0, "Encoding parent: blkno: %"MLFu64", generation: %u\n",
-		     blkno, generation);
+		mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)blkno, generation);
 	}
 	
 	*max_len = len;
@@ -220,16 +221,17 @@ static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh,
 		parent.ih_blkno |= (u64)le32_to_cpu(fh[4]);
 		parent.ih_generation = le32_to_cpu(fh[5]);
 
-		mlog(0, "Decoding parent: blkno: %"MLFu64", generation: %u\n",
-		     parent.ih_blkno, parent.ih_generation);
+		mlog(0, "Decoding parent: blkno: %llu, generation: %u\n",
+		     (unsigned long long)parent.ih_blkno,
+		     parent.ih_generation);
 	}
 
 	handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32;
 	handle.ih_blkno |= (u64)le32_to_cpu(fh[1]);
 	handle.ih_generation = le32_to_cpu(fh[2]);
 
-	mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
-	     handle.ih_blkno, handle.ih_generation);
+	mlog(0, "Encoding fh: blkno: %llu, generation: %u\n",
+	     (unsigned long long)handle.ih_blkno, handle.ih_generation);
 
 	ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent,
 						    acceptable, context);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e6f207eebab..4601fc256f1 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -182,10 +182,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (rec_end > OCFS2_I(inode)->ip_clusters) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+					    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 					    i,
-					    le64_to_cpu(rec->e_blkno),
-					    OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno),
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 					    OCFS2_I(inode)->ip_clusters);
 				goto out_free;
 			}
@@ -233,11 +233,11 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			if (blkno) {
 				mlog_errno(ret);
 				ocfs2_error(inode->i_sb,
-					    "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n",
+					    "Multiple extents for (cpos = %u, clusters = %u) on inode %llu; e_blkno %llu and rec %d at e_blkno %llu\n",
 					    cpos, clusters,
-					    OCFS2_I(inode)->ip_blkno,
-					    blkno, i,
-					    le64_to_cpu(rec->e_blkno));
+					    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+					    (unsigned long long)blkno, i,
+					    (unsigned long long)le64_to_cpu(rec->e_blkno));
 				goto out_free;
 			}
 
@@ -251,9 +251,9 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 		ret = -EBADR;
 		if (!blkno) {
 			ocfs2_error(inode->i_sb,
-				    "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n",
+				    "No record found for (cpos = %u, clusters = %u) on inode %llu\n",
 				    cpos, clusters,
-				    OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			mlog_errno(ret);
 			goto out_free;
 		}
@@ -288,10 +288,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",
+				    "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n",
 				    i,
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 				    OCFS2_I(inode)->ip_clusters);
 			return ret;
 		}
@@ -557,9 +557,9 @@ static int ocfs2_extent_map_insert(struct inode *inode,
 			ret = -EBADR;
 			mlog_errno(ret);
 			ocfs2_error(inode->i_sb,
-				    "Zero e_clusters on non-tail extent record at e_blkno %"MLFu64" on inode %"MLFu64"\n",
-				    le64_to_cpu(rec->e_blkno),
-				    OCFS2_I(inode)->ip_blkno);
+				    "Zero e_clusters on non-tail extent record at e_blkno %llu on inode %llu\n",
+				    (unsigned long long)le64_to_cpu(rec->e_blkno),
+				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
 			return ret;
 		}
 
@@ -660,10 +660,10 @@ int ocfs2_extent_map_append(struct inode *inode,
 	mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
 			 le32_to_cpu(rec->e_clusters)) !=
 			(em->em_clusters + new_clusters),
-			"Inode %"MLFu64":\n"
+			"Inode %llu:\n"
 			"rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
 			"em->em_clusters = %u + new_clusters = %u = %u\n",
-			OCFS2_I(inode)->ip_blkno,
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
 			le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
 			em->em_clusters, new_clusters,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a4048b55fd..4b4cbadd583 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -220,8 +220,9 @@ static int ocfs2_truncate_file(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_truncate_context *tc = NULL;
 
-	mlog_entry("(inode = %"MLFu64", new_i_size = %"MLFu64"\n",
-		   OCFS2_I(inode)->ip_blkno, new_i_size);
+	mlog_entry("(inode = %llu, new_i_size = %llu\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		   (unsigned long long)new_i_size);
 
 	truncate_inode_pages(inode->i_mapping, new_i_size);
 
@@ -233,23 +234,26 @@ static int ocfs2_truncate_file(struct inode *inode,
 	}
 
 	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
-			"Inode %"MLFu64", inode i_size = %lld != di "
-			"i_size = %"MLFu64", i_flags = 0x%x\n",
-			OCFS2_I(inode)->ip_blkno,
+			"Inode %llu, inode i_size = %lld != di "
+			"i_size = %llu, i_flags = 0x%x\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			i_size_read(inode),
-			le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags));
+			(unsigned long long)le64_to_cpu(fe->i_size),
+			le32_to_cpu(fe->i_flags));
 
 	if (new_i_size > le64_to_cpu(fe->i_size)) {
-		mlog(0, "asked to truncate file with size (%"MLFu64") "
-		     "to size (%"MLFu64")!\n",
-		     le64_to_cpu(fe->i_size), new_i_size);
+		mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
+		     (unsigned long long)le64_to_cpu(fe->i_size),
+		     (unsigned long long)new_i_size);
 		status = -EINVAL;
 		mlog_errno(status);
 		goto bail;
 	}
 
-	mlog(0, "inode %"MLFu64", i_size = %"MLFu64", new_i_size = %"MLFu64"\n",
-	     le64_to_cpu(fe->i_blkno), le64_to_cpu(fe->i_size), new_i_size);
+	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
+	     (unsigned long long)le64_to_cpu(fe->i_blkno),
+	     (unsigned long long)le64_to_cpu(fe->i_size),
+	     (unsigned long long)new_i_size);
 
 	/* lets handle the simple truncate cases before doing any more
 	 * cluster locking. */
@@ -378,8 +382,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "Allocating %u clusters at block %u for inode %"MLFu64"\n",
-	     num_bits, bit_off, OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
+	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
 				     num_bits, meta_ac);
 	if (status < 0) {
@@ -449,9 +453,9 @@ static int ocfs2_extend_allocation(struct inode *inode,
 restart_all:
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 
-	mlog(0, "extend inode %"MLFu64", i_size = %lld, fe->i_clusters = %u, "
+	mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
 	     "clusters_to_add = %u\n",
-	     OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
 	     fe->i_clusters, clusters_to_add);
 
 	handle = ocfs2_alloc_handle(osb);
@@ -569,8 +573,8 @@ restarted_transaction:
 		}
 	}
 
-	mlog(0, "fe: i_clusters = %u, i_size=%"MLFu64"\n",
-	     fe->i_clusters, fe->i_size);
+	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
+	     fe->i_clusters, (unsigned long long)fe->i_size);
 	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
 	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
 
@@ -865,8 +869,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di;
 
-	mlog_entry("(Inode %"MLFu64", mode 0%o)\n", oi->ip_blkno,
-		   inode->i_mode);
+	mlog_entry("(Inode %llu, mode 0%o)\n",
+		   (unsigned long long)oi->ip_blkno, inode->i_mode);
 
 	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 315472a5c19..327a5b7b86e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -95,7 +95,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 	struct super_block *sb = osb->sb;
 	struct ocfs2_find_inode_args args;
 
-	mlog_entry("(blkno = %"MLFu64")\n", blkno);
+	mlog_entry("(blkno = %llu)\n", (unsigned long long)blkno);
 
 	/* Ok. By now we've either got the offsets passed to us by the
 	 * caller, or we just pulled them off the bh. Lets do some
@@ -134,8 +134,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 
 bail:
 	if (!IS_ERR(inode)) {
-		mlog(0, "returning inode with number %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(0, "returning inode with number %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		mlog_exit_ptr(inode);
 	} else
 		mlog_errno(PTR_ERR(inode));
@@ -219,7 +219,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	struct ocfs2_super *osb;
 	int status = -EINVAL;
 
-	mlog_entry("(0x%p, size:%"MLFu64")\n", inode, fe->i_size);
+	mlog_entry("(0x%p, size:%llu)\n", inode,
+		   (unsigned long long)fe->i_size);
 
 	sb = inode->i_sb;
 	osb = OCFS2_SB(sb);
@@ -228,9 +229,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	 * today.  change if needed. */
 	if (!OCFS2_IS_VALID_DINODE(fe) ||
 	    !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
-		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%"MLFu64", "
+		mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
 		     "signature = %.*s, flags = 0x%x\n",
-		     inode->i_ino, le64_to_cpu(fe->i_blkno), 7,
+		     inode->i_ino,
+		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
 		     fe->i_signature, le32_to_cpu(fe->i_flags));
 		goto bail;
 	}
@@ -268,8 +270,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
 		mlog(ML_ERROR,
-		     "ip_blkno %"MLFu64" != i_blkno %"MLFu64"!\n",
-		     OCFS2_I(inode)->ip_blkno, fe->i_blkno);
+		     "ip_blkno %llu != i_blkno %llu!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)fe->i_blkno);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
@@ -278,8 +281,8 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_ino = ino_from_blkno(inode->i_sb,
 			       le64_to_cpu(fe->i_blkno));
 
-	mlog(0, "blkno = %"MLFu64", ino = %lu, create_ino = %s\n",
-	     fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
+	mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
+	     (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
 
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 
@@ -371,8 +374,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
-		mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
-		     fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -386,8 +389,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
 	status = -EINVAL;
 	if (ocfs2_populate_inode(inode, fe, 0) < 0) {
-		mlog(ML_ERROR, "populate inode failed! i_blkno=%"MLFu64", "
-		     "i_ino=%lu\n", fe->i_blkno, inode->i_ino);
+		mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		make_bad_inode(inode);
 		goto bail;
 	}
@@ -675,8 +678,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
 	 * never get here as system file inodes should always have a
 	 * positive link count. */
 	if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
-		mlog(ML_ERROR, "Skipping delete of system file %"MLFu64".\n",
-		     oi->ip_blkno);
+		mlog(ML_ERROR, "Skipping delete of system file %llu\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail_unlock;
 	}
 
@@ -715,16 +718,16 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * ocfs2_delete_inode, another node might have asked to delete
 	 * the inode. Recheck our flags to catch this. */
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
-		mlog(0, "Skipping delete of %"MLFu64" because flags changed\n",
-		     oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because flags changed\n",
+		     (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 
 	/* Now that we have an up to date inode, we can double check
 	 * the link count. */
 	if (inode->i_nlink) {
-		mlog(0, "Skipping delete of %"MLFu64" because nlink = %u\n",
-		     oi->ip_blkno, inode->i_nlink);
+		mlog(0, "Skipping delete of %llu because nlink = %u\n",
+		     (unsigned long long)oi->ip_blkno, inode->i_nlink);
 		goto bail;
 	}
 
@@ -734,9 +737,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		/* for lack of a better error? */
 		status = -EEXIST;
 		mlog(ML_ERROR,
-		     "Inode %"MLFu64" (on-disk %"MLFu64") not orphaned! "
+		     "Inode %llu (on-disk %llu) not orphaned! "
 		     "Disk flags  0x%x, inode flags 0x%x\n",
-		     oi->ip_blkno, di->i_blkno, di->i_flags, oi->ip_flags);
+		     (unsigned long long)oi->ip_blkno,
+		     (unsigned long long)di->i_blkno, di->i_flags,
+		     oi->ip_flags);
 		goto bail;
 	}
 
@@ -753,8 +758,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	 * disk and let them worry about deleting it. */
 	if (status == -EBUSY) {
 		status = 0;
-		mlog(0, "Skipping delete of %"MLFu64" because it is in use on"
-		     "other nodes\n", oi->ip_blkno);
+		mlog(0, "Skipping delete of %llu because it is in use on"
+		     "other nodes\n", (unsigned long long)oi->ip_blkno);
 		goto bail;
 	}
 	if (status < 0) {
@@ -768,13 +773,13 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 		 * into. This may happen during node death and
 		 * recovery knows how to clean it up so we can safely
 		 * ignore this inode for now on. */
-		mlog(0, "Nobody knew where inode %"MLFu64" was orphaned!\n",
-		     oi->ip_blkno);
+		mlog(0, "Nobody knew where inode %llu was orphaned!\n",
+		     (unsigned long long)oi->ip_blkno);
 	} else {
 		*wipe = 1;
 
-		mlog(0, "Inode %"MLFu64" is ok to wipe from orphan dir %d\n",
-		     oi->ip_blkno, oi->ip_orphaned_slot);
+		mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
+		     (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
 	}
 	spin_unlock(&oi->ip_lock);
 
@@ -788,8 +793,8 @@ bail:
 static void ocfs2_cleanup_delete_inode(struct inode *inode,
 				       int sync_data)
 {
-	mlog(0, "Cleanup inode %"MLFu64", sync = %d\n",
-	     OCFS2_I(inode)->ip_blkno, sync_data);
+	mlog(0, "Cleanup inode %llu, sync = %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
 	if (sync_data)
 		write_inode_now(inode, 1);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -897,8 +902,8 @@ void ocfs2_clear_inode(struct inode *inode)
 	if (!inode)
 		goto bail;
 
-	mlog(0, "Clearing inode: %"MLFu64", nlink = %u\n",
-	     OCFS2_I(inode)->ip_blkno, inode->i_nlink);
+	mlog(0, "Clearing inode: %llu, nlink = %u\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
@@ -919,8 +924,8 @@ void ocfs2_clear_inode(struct inode *inode)
 		ocfs2_checkpoint_inode(inode);
 
 	mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
-			"Clear inode of %"MLFu64", inode has io markers\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has io markers\n",
+			(unsigned long long)oi->ip_blkno);
 
 	ocfs2_extent_map_drop(inode, 0);
 	ocfs2_extent_map_init(inode);
@@ -936,20 +941,20 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_metadata_cache_purge(inode);
 
 	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
-			"Clear inode of %"MLFu64", inode has %u cache items\n",
-			oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+			"Clear inode of %llu, inode has %u cache items\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
 
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Clear inode of %"MLFu64", inode has a bad flag\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode has a bad flag\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(spin_is_locked(&oi->ip_lock),
-			"Clear inode of %"MLFu64", inode is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, inode is locked\n",
+			(unsigned long long)oi->ip_blkno);
 
 	mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
-			"Clear inode of %"MLFu64", io_mutex is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, io_mutex is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	mutex_unlock(&oi->ip_io_mutex);
 
 	/*
@@ -957,19 +962,19 @@ void ocfs2_clear_inode(struct inode *inode)
 	 * kernel 1, world 0
 	 */
 	mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem),
-			"Clear inode of %"MLFu64", alloc_sem is locked\n",
-			oi->ip_blkno);
+			"Clear inode of %llu, alloc_sem is locked\n",
+			(unsigned long long)oi->ip_blkno);
 	up_write(&oi->ip_alloc_sem);
 
 	mlog_bug_on_msg(oi->ip_open_count,
-			"Clear inode of %"MLFu64" has open count %d\n",
-			oi->ip_blkno, oi->ip_open_count);
+			"Clear inode of %llu has open count %d\n",
+			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
 	mlog_bug_on_msg(!list_empty(&oi->ip_handle_list),
-			"Clear inode of %"MLFu64" has non empty handle list\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle list\n",
+			(unsigned long long)oi->ip_blkno);
 	mlog_bug_on_msg(oi->ip_handle,
-			"Clear inode of %"MLFu64" has non empty handle pointer\n",
-			oi->ip_blkno);
+			"Clear inode of %llu has non empty handle pointer\n",
+			(unsigned long long)oi->ip_blkno);
 
 	/* Clear all other flags. */
 	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
@@ -991,8 +996,8 @@ void ocfs2_drop_inode(struct inode *inode)
 
 	mlog_entry_void();
 
-	mlog(0, "Drop inode %"MLFu64", nlink = %u, ip_flags = 0x%x\n",
-	     oi->ip_blkno, inode->i_nlink, oi->ip_flags);
+	mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
+	     (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
 
 	/* Testing ip_orphaned_slot here wouldn't work because we may
 	 * not have gotten a delete_inode vote from any other nodes
@@ -1069,8 +1074,8 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	int status = 0;
 
-	mlog_entry("(inode = 0x%p, ino = %"MLFu64")\n", inode,
-		   inode ? OCFS2_I(inode)->ip_blkno : 0ULL);
+	mlog_entry("(inode = 0x%p, ino = %llu)\n", inode,
+		   inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL);
 
 	if (!inode) {
 		mlog(0, "eep, no inode!\n");
@@ -1114,7 +1119,8 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 	int status;
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
 
-	mlog_entry("(inode %"MLFu64")\n", OCFS2_I(inode)->ip_blkno);
+	mlog_entry("(inode %llu)\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_journal_access(handle, inode, bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4be801f4559..ae3440ca083 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -503,8 +503,8 @@ static void ocfs2_handle_cleanup_locks(struct ocfs2_journal *journal,
 		ocfs2_meta_unlock(inode, 1);
 		if (atomic_read(&inode->i_count) == 1)
 			mlog(ML_ERROR,
-			     "Inode %"MLFu64", I'm doing a last iput for!",
-			     OCFS2_I(inode)->ip_blkno);
+			     "Inode %llu, I'm doing a last iput for!",
+			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		iput(inode);
 		kmem_cache_free(ocfs2_lock_cache, lock);
 	}
@@ -640,8 +640,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		/* This is called from startup/shutdown which will
 		 * handle the errors in a specific manner, so no need
 		 * to call ocfs2_error() here. */
-		mlog(ML_ERROR, "Journal dinode %"MLFu64"  has invalid "
-		     "signature: %.*s", fe->i_blkno, 7, fe->i_signature);
+		mlog(ML_ERROR, "Journal dinode %llu  has invalid "
+		     "signature: %.*s", (unsigned long long)fe->i_blkno, 7,
+		     fe->i_signature);
 		status = -EIO;
 		goto out;
 	}
@@ -934,8 +935,8 @@ void ocfs2_complete_recovery(void *data)
 
 		la_dinode = item->lri_la_dinode;
 		if (la_dinode) {
-			mlog(0, "Clean up local alloc %"MLFu64"\n",
-			     la_dinode->i_blkno);
+			mlog(0, "Clean up local alloc %llu\n",
+			     (unsigned long long)la_dinode->i_blkno);
 
 			ret = ocfs2_complete_local_alloc_recovery(osb,
 								  la_dinode);
@@ -947,8 +948,8 @@ void ocfs2_complete_recovery(void *data)
 
 		tl_dinode = item->lri_tl_dinode;
 		if (tl_dinode) {
-			mlog(0, "Clean up truncate log %"MLFu64"\n",
-			     tl_dinode->i_blkno);
+			mlog(0, "Clean up truncate log %llu\n",
+			     (unsigned long long)tl_dinode->i_blkno);
 
 			ret = ocfs2_complete_truncate_log_recovery(osb,
 								   tl_dinode);
@@ -1473,11 +1474,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (de->file_type > OCFS2_FT_MAX) {
 				mlog(ML_ERROR,
 				     "block %llu contains invalid de: "
-				     "inode = %"MLFu64", rec_len = %u, "
+				     "inode = %llu, rec_len = %u, "
 				     "name_len = %u, file_type = %u, "
 				     "name='%.*s'\n",
 				     (unsigned long long)bh->b_blocknr,
-				     le64_to_cpu(de->inode),
+				     (unsigned long long)le64_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len),
 				     de->name_len,
 				     de->file_type,
@@ -1494,8 +1495,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (IS_ERR(iter))
 				continue;
 
-			mlog(0, "queue orphan %"MLFu64"\n",
-			     OCFS2_I(iter)->ip_blkno);
+			mlog(0, "queue orphan %llu\n",
+			     (unsigned long long)OCFS2_I(iter)->ip_blkno);
 			/* No locking is required for the next_orphan
 			 * queue as there is only ever a single
 			 * process doing orphan recovery. */
@@ -1588,7 +1589,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 
 	while (inode) {
 		oi = OCFS2_I(inode);
-		mlog(0, "iput orphan %"MLFu64"\n", oi->ip_blkno);
+		mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno);
 
 		iter = oi->ip_next_orphan;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 149b3518166..0d1973ea32b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -143,8 +143,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
 	if (!(le32_to_cpu(alloc->i_flags) &
 	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
-		mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		status = -EINVAL;
 		goto bail;
 	}
@@ -493,9 +493,9 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
 	    ocfs2_local_alloc_count_bits(alloc)) {
-		ocfs2_error(osb->sb, "local alloc inode %"MLFu64" says it has "
+		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
 			    "%u free bits, but a count shows %u",
-			    le64_to_cpu(alloc->i_blkno),
+			    (unsigned long long)le64_to_cpu(alloc->i_blkno),
 			    le32_to_cpu(alloc->id1.bitmap1.i_used),
 			    ocfs2_local_alloc_count_bits(alloc));
 		status = -EIO;
@@ -753,10 +753,11 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 				ocfs2_clusters_to_blocks(osb->sb,
 							 start - count);
 
-			mlog(0, "freeing %u bits starting at local "
-			     "alloc bit %u (la_start_blk = %"MLFu64", "
-			     "blkno = %"MLFu64")\n", count, start - count,
-			     la_start_blk, blkno);
+			mlog(0, "freeing %u bits starting at local alloc bit "
+			     "%u (la_start_blk = %llu, blkno = %llu)\n",
+			     count, start - count,
+			     (unsigned long long)la_start_blk,
+			     (unsigned long long)blkno);
 
 			status = ocfs2_free_clusters(handle, main_bm_inode,
 						     main_bm_bh, blkno, count);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f6b77ff1d2b..274f61d0cda 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -161,8 +161,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 		goto bail;
 	}
 
-	mlog(0, "find name %.*s in directory %"MLFu64"\n", dentry->d_name.len,
-	     dentry->d_name.name, OCFS2_I(dir)->ip_blkno);
+	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
+	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
 	if (status < 0) {
@@ -180,7 +180,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
 	if (IS_ERR(inode)) {
-		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
+		mlog(ML_ERROR, "Unable to create inode %llu\n",
+		     (unsigned long long)blkno);
 		ret = ERR_PTR(-EACCES);
 		goto bail_unlock;
 	}
@@ -310,8 +311,8 @@ static int ocfs2_mknod(struct inode *dir,
 	osb = OCFS2_SB(dir->i_sb);
 
 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %"MLFu64" has i_nlink of %u\n",
-		     OCFS2_I(dir)->ip_blkno, dir->i_nlink);
+		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
 		status = -EMLINK;
 		goto leave;
 	}
@@ -562,9 +563,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
-		     "i_blkno=%"MLFu64", i_ino=%lu\n",
+		     "i_blkno=%llu, i_ino=%lu\n",
 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
-		     fe->i_blkno, inode->i_ino);
+		     (unsigned long long)fe->i_blkno, inode->i_ino);
 		BUG();
 	}
 
@@ -765,7 +766,7 @@ static int ocfs2_unlink(struct inode *dir,
 
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
-	mlog(0, "ino = %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
+	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	if (inode == osb->root_inode) {
 		mlog(0, "Cannot delete the root directory\n");
@@ -799,9 +800,9 @@ static int ocfs2_unlink(struct inode *dir,
 	if (OCFS2_I(inode)->ip_blkno != blkno) {
 		status = -ENOENT;
 
-		mlog(0, "ip_blkno (%"MLFu64") != dirent blkno (%"MLFu64") "
-		     "ip_flags = %x\n", OCFS2_I(inode)->ip_blkno, blkno,
-		     OCFS2_I(inode)->ip_flags);
+		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
 		goto leave;
 	}
 
@@ -946,8 +947,9 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	struct buffer_head **tmpbh;
 	struct inode *tmpinode;
 
-	mlog_entry("(inode1 = %"MLFu64", inode2 = %"MLFu64")\n",
-		   oi1->ip_blkno, oi2->ip_blkno);
+	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
+		   (unsigned long long)oi1->ip_blkno,
+		   (unsigned long long)oi2->ip_blkno);
 
 	BUG_ON(!handle);
 
@@ -1187,9 +1189,9 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
 			status = -EACCES;
 
-			mlog(0, "Inode blkno (%"MLFu64") and dir (%"MLFu64") "
-			     "disagree. ip_flags = %x\n",
-			     OCFS2_I(new_inode)->ip_blkno, newfe_blkno,
+			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
+			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
+			     (unsigned long long)newfe_blkno,
 			     OCFS2_I(new_inode)->ip_flags);
 			goto bail;
 		}
@@ -1215,9 +1217,9 @@ static int ocfs2_rename(struct inode *old_dir,
 
 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
 
-		mlog(0, "aha rename over existing... new_de=%p "
-		     "new_blkno=%"MLFu64" newfebh=%p bhblocknr=%llu\n",
-		     new_de, newfe_blkno, newfe_bh, newfe_bh ?
+		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
+		     "newfebh=%p bhblocknr=%llu\n", new_de,
+		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
@@ -1354,8 +1356,8 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (new_dir_nlink != new_dir->i_nlink) {
 			if (!new_dir_bh) {
 				mlog(ML_ERROR, "need to change nlink for new "
-				     "dir %"MLFu64" from %d to %d but bh is "
-				     "NULL\n", OCFS2_I(new_dir)->ip_blkno,
+				     "dir %llu from %d to %d but bh is NULL\n",
+				     (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
 				     (int)new_dir_nlink, new_dir->i_nlink);
 			} else {
 				struct ocfs2_dinode *fe;
@@ -1372,10 +1374,9 @@ static int ocfs2_rename(struct inode *old_dir,
 	if (old_dir_nlink != old_dir->i_nlink) {
 		if (!old_dir_bh) {
 			mlog(ML_ERROR, "need to change nlink for old dir "
-			     "%"MLFu64" from %d to %d but bh is NULL!\n",
-			     OCFS2_I(old_dir)->ip_blkno,
-			     (int)old_dir_nlink,
-			     old_dir->i_nlink);
+			     "%llu from %d to %d but bh is NULL!\n",
+			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
+			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
 			status = ocfs2_journal_access(handle, old_dir,
@@ -1634,9 +1635,9 @@ static int ocfs2_symlink(struct inode *dir,
 						    NULL);
 		if (status < 0) {
 			if (status != -ENOSPC && status != -EINTR) {
-				mlog(ML_ERROR, "Failed to extend file to "
-					       "%"MLFu64"\n",
-				     newsize);
+				mlog(ML_ERROR,
+				     "Failed to extend file to %llu\n",
+				     (unsigned long long)newsize);
 				mlog_errno(status);
 				status = -ENOSPC;
 			}
@@ -1716,10 +1717,11 @@ int ocfs2_check_dir_entry(struct inode * dir,
 		error_msg = "directory entry across blocks";
 
 	if (error_msg != NULL)
-		mlog(ML_ERROR, "bad entry in directory #%"MLFu64": %s - "
-		     "offset=%lu, inode=%"MLFu64", rec_len=%d, name_len=%d\n",
-		     OCFS2_I(dir)->ip_blkno, error_msg, offset,
-		     le64_to_cpu(de->inode), rlen, de->name_len);
+		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
+		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
+		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
+		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
+		     de->name_len);
 	return error_msg == NULL ? 1 : 0;
 }
 
@@ -2021,8 +2023,8 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
 
 	mlog_entry_void();
 
-	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016"MLFx64,
-			   blkno);
+	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
+			   (long long)blkno);
 	if (namelen <= 0) {
 		if (namelen)
 			status = namelen;
@@ -2167,8 +2169,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
-	mlog(0, "Inode %"MLFu64" orphaned in slot %d\n",
-	     OCFS2_I(inode)->ip_blkno, osb->slot_num);
+	mlog(0, "Inode %llu orphaned in slot %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
 leave:
 	if (orphan_dir_inode)
@@ -2202,8 +2204,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	mlog(0, "removing '%s' from orphan dir %"MLFu64" (namelen=%d)\n",
-	     name, OCFS2_I(orphan_dir_inode)->ip_blkno, OCFS2_ORPHAN_NAMELEN);
+	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
+	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
+	     OCFS2_ORPHAN_NAMELEN);
 
 	/* find it's spot in the orphan directory */
 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e89de9b6e49..da1093039c0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -357,8 +357,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_DINODE(__sb, __di)	do {			\
 	typeof(__di) ____di = (__di);					\
 	ocfs2_error((__sb), 						\
-		"Dinode # %"MLFu64" has bad signature %.*s",		\
-		(____di)->i_blkno, 7,					\
+		"Dinode # %llu has bad signature %.*s",			\
+		(unsigned long long)(____di)->i_blkno, 7,		\
 		(____di)->i_signature);					\
 } while (0);
 
@@ -368,8 +368,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb)	do {		\
 	typeof(__eb) ____eb = (__eb);					\
 	ocfs2_error((__sb), 						\
-		"Extent Block # %"MLFu64" has bad signature %.*s",	\
-		(____eb)->h_blkno, 7,					\
+		"Extent Block # %llu has bad signature %.*s",		\
+		(unsigned long long)(____eb)->h_blkno, 7,		\
 		(____eb)->h_signature);					\
 } while (0);
 
@@ -379,8 +379,8 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 #define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd)	do {		\
 	typeof(__gd) ____gd = (__gd);					\
 		ocfs2_error((__sb),					\
-		"Group Descriptor # %"MLFu64" has bad signature %.*s",	\
-		(____gd)->bg_blkno, 7,					\
+		"Group Descriptor # %llu has bad signature %.*s",	\
+		(unsigned long long)(____gd)->bg_blkno, 7,		\
 		(____gd)->bg_signature);				\
 } while (0);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c46c164aefb..195523090c8 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -157,8 +157,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
 	mlog_entry_void();
 
 	if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
-		ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
-			    "!= b_blocknr (%llu)", group_blkno,
+		ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+			    "b_blocknr (%llu)",
+			    (unsigned long long)group_blkno,
 			    (unsigned long long) bg_bh->b_blocknr);
 		status = -EIO;
 		goto bail;
@@ -280,8 +281,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	/* setup the group */
 	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
-	     alloc_rec, bg_blkno);
+	mlog(0, "new descriptor, record %u, at block %llu\n",
+	     alloc_rec, (unsigned long long)bg_blkno);
 
 	bg_bh = sb_getblk(osb->sb, bg_blkno);
 	if (!bg_bh) {
@@ -382,8 +383,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
-		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
-			    "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno));
 		status = -EIO;
 		goto bail;
 	}
@@ -829,9 +830,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
 		goto out;
 	}
 
-	mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
-	     "top, prev = %"MLFu64"\n",
-	     fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+	mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+	     (unsigned long long)fe->i_blkno, chain,
+	     (unsigned long long)bg->bg_blkno,
+	     (unsigned long long)prev_bg->bg_blkno);
 
 	fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -974,8 +976,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	struct ocfs2_group_desc *bg;
 
 	chain = ac->ac_chain;
-	mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
-	     bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+	mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+	     bits_wanted, chain,
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
 	status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
 				  le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1027,8 +1030,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
-	     tmp_bits, bg->bg_blkno);
+	mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+	     tmp_bits, (unsigned long long)bg->bg_blkno);
 
 	*num_bits = tmp_bits;
 
@@ -1092,8 +1095,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
-	     *num_bits, fe->i_blkno);
+	mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+	     (unsigned long long)fe->i_blkno);
 
 	*bg_blkno = le64_to_cpu(bg->bg_blkno);
 bail:
@@ -1134,9 +1137,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	}
 	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
 	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
-		ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
-			    "used bits but only %u total.",
-			    le64_to_cpu(fe->i_blkno),
+		ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+			    "bits but only %u total.",
+			    (unsigned long long)le64_to_cpu(fe->i_blkno),
 			    le32_to_cpu(fe->id1.bitmap1.i_used),
 			    le32_to_cpu(fe->id1.bitmap1.i_total));
 		status = -EIO;
@@ -1479,10 +1482,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
 	}
 	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
-	mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-	     ", starting at %u\n",
-	     OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
-	     start_bit);
+	mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+	     (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+	     (unsigned long long)bg_blkno, start_bit);
 
 	status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
 				  alloc_inode);
@@ -1592,10 +1594,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
 	ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
 				     &bg_start_bit);
 
-	mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
-	     num_clusters, start_blk);
-	mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
-	     bg_blkno, bg_start_bit);
+	mlog(0, "want to free %u clusters starting at block %llu\n",
+	     num_clusters, (unsigned long long)start_blk);
+	mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+	     (unsigned long long)bg_blkno, bg_start_bit);
 
 	status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
 					  bg_start_bit, bg_blkno,
@@ -1616,18 +1618,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
 	printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
 	printk("bg_chain:           %u\n", bg->bg_chain);
 	printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-	printk("bg_next_group:      %"MLFu64"\n", bg->bg_next_group);
-	printk("bg_parent_dinode:   %"MLFu64"\n", bg->bg_parent_dinode);
-	printk("bg_blkno:           %"MLFu64"\n", bg->bg_blkno);
+	printk("bg_next_group:      %llu\n",
+	       (unsigned long long)bg->bg_next_group);
+	printk("bg_parent_dinode:   %llu\n",
+	       (unsigned long long)bg->bg_parent_dinode);
+	printk("bg_blkno:           %llu\n",
+	       (unsigned long long)bg->bg_blkno);
 }
 
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 {
 	int i;
 
-	printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+	printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
 	printk("i_signature:                  %s\n", fe->i_signature);
-	printk("i_size:                       %"MLFu64"\n", fe->i_size);
+	printk("i_size:                       %llu\n",
+	       (unsigned long long)fe->i_size);
 	printk("i_clusters:                   %u\n", fe->i_clusters);
 	printk("i_generation:                 %u\n",
 	       le32_to_cpu(fe->i_generation));
@@ -1645,7 +1651,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 		       fe->id2.i_chain.cl_recs[i].c_free);
 		printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
 		       fe->id2.i_chain.cl_recs[i].c_total);
-		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
-		       fe->id2.i_chain.cl_recs[i].c_blkno);
+		printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+		       (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
 	}
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 44d8b524823..949b3dac30f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1428,8 +1428,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
 	mlog(0, "vol_label: %s\n", osb->vol_label);
 	mlog(0, "uuid: %s\n", osb->uuid_str);
-	mlog(0, "root_blkno=%"MLFu64", system_dir_blkno=%"MLFu64"\n",
-	     osb->root_blkno, osb->system_dir_blkno);
+	mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
+	     (unsigned long long)osb->root_blkno,
+	     (unsigned long long)osb->system_dir_blkno);
 
 	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
 	if (!osb->osb_dlm_debug) {
@@ -1472,8 +1473,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
 	osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
 	brelse(bitmap_bh);
-	mlog(0, "cluster bitmap inode: %"MLFu64", clusters per group: %u\n",
-	     osb->bitmap_blkno, osb->bitmap_cpg);
+	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
+	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
 
 	status = ocfs2_init_slot_info(osb);
 	if (status < 0) {
@@ -1531,8 +1532,9 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
 			     OCFS2_MINOR_REV_LEVEL);
 		} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
 			mlog(ML_ERROR, "bad block number on superblock: "
-			     "found %"MLFu64", should be %llu\n",
-			     di->i_blkno, (unsigned long long)bh->b_blocknr);
+			     "found %llu, should be %llu\n",
+			     (unsigned long long)di->i_blkno,
+			     (unsigned long long)bh->b_blocknr);
 		} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
 			    le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
 			mlog(ML_ERROR, "bad cluster size found: %u\n",
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index c564177dfbd..783f5270f2a 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -33,12 +33,16 @@ int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
 
 void __ocfs2_error(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 void __ocfs2_abort(struct super_block *sb,
 		   const char *function,
-		   const char *fmt, ...);
+		   const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
 #endif /* OCFS2_SUPER_H */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 300b5bedfb2..04a684dfdd9 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -119,8 +119,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 	to_purge = ci->ci_num_cached;
 
-	mlog(0, "Purge %u %s items from Inode %"MLFu64"\n", to_purge,
-	     tree ? "array" : "tree", oi->ip_blkno);
+	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
+	     tree ? "array" : "tree", (unsigned long long)oi->ip_blkno);
 
 	/* If we're a tree, save off the root so that we can safely
 	 * initialize the cache. We do the work to free tree members
@@ -136,8 +136,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	 * easily detect counting errors. Unfortunately, this is only
 	 * meaningful for trees. */
 	if (tree && purged != to_purge)
-		mlog(ML_ERROR, "Inode %"MLFu64", count = %u, purged = %u\n",
-		     oi->ip_blkno, to_purge, purged);
+		mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n",
+		     (unsigned long long)oi->ip_blkno, to_purge, purged);
 }
 
 /* Returns the index in the cache array, -1 if not found.
@@ -186,8 +186,9 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 
 	spin_lock(&oi->ip_lock);
 
-	mlog(0, "Inode %"MLFu64", query block %llu (inline = %u)\n",
-	     oi->ip_blkno, (unsigned long long) bh->b_blocknr,
+	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) bh->b_blocknr,
 	     !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
@@ -293,12 +294,12 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
-			"Inode %"MLFu64", num cached = %u, should be %u\n",
-			oi->ip_blkno, ci->ci_num_cached,
+			"Inode %llu, num cached = %u, should be %u\n",
+			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
 			OCFS2_INODE_MAX_CACHE_ARRAY);
 	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
-			"Inode %"MLFu64" not marked as inline anymore!\n",
-			oi->ip_blkno);
+			"Inode %llu not marked as inline anymore!\n",
+			(unsigned long long)oi->ip_blkno);
 	assert_spin_locked(&oi->ip_lock);
 
 	/* Be careful to initialize the tree members *first* because
@@ -316,8 +317,8 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 		tree[i] = NULL;
 	}
 
-	mlog(0, "Expanded %"MLFu64" to a tree cache: flags 0x%x, num = %u\n",
-	     oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
+	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
+	     (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -332,8 +333,9 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
 		{ NULL, };
 
-	mlog(0, "Inode %"MLFu64", block %llu, expand = %d\n",
-	     oi->ip_blkno, (unsigned long long) block, expand_tree);
+	mlog(0, "Inode %llu, block %llu, expand = %d\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)block, expand_tree);
 
 	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL);
 	if (!new) {
@@ -414,8 +416,9 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	if (ocfs2_buffer_cached(oi, bh))
 		return;
 
-	mlog(0, "Inode %"MLFu64", inserting block %llu\n", oi->ip_blkno,
-	     (unsigned long long) bh->b_blocknr);
+	mlog(0, "Inode %llu, inserting block %llu\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
 	 * ip_io_mutex */
@@ -504,8 +507,9 @@ void ocfs2_remove_from_cache(struct inode *inode,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	spin_lock(&oi->ip_lock);
-	mlog(0, "Inode %"MLFu64", remove %llu, items = %u, array = %u\n",
-	     oi->ip_blkno, (unsigned long long) block, ci->ci_num_cached,
+	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
+	     (unsigned long long)oi->ip_blkno,
+	     (unsigned long long) block, ci->ci_num_cached,
 	     oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
 
 	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 021978e0576..53049a20419 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -190,20 +190,21 @@ static int ocfs2_process_delete_request(struct inode *inode,
 				OCFS2_INVALID_SLOT &&
 				OCFS2_I(inode)->ip_orphaned_slot !=
 				(*orphaned_slot),
-				"Inode %"MLFu64": This node thinks it's "
+				"Inode %llu: This node thinks it's "
 				"orphaned in slot %d, messaged it's in %d\n",
-				OCFS2_I(inode)->ip_blkno,
+				(unsigned long long)OCFS2_I(inode)->ip_blkno,
 				OCFS2_I(inode)->ip_orphaned_slot,
 				*orphaned_slot);
 
-		mlog(0, "Setting orphaned slot for inode %"MLFu64" to %d\n",
-		     OCFS2_I(inode)->ip_blkno, *orphaned_slot);
+		mlog(0, "Setting orphaned slot for inode %llu to %d\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     *orphaned_slot);
 
 		OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot;
 	} else {
-		mlog(0, "Sending back orphaned slot %d for inode %"MLFu64"\n",
+		mlog(0, "Sending back orphaned slot %d for inode %llu\n",
 		     OCFS2_I(inode)->ip_orphaned_slot,
-		     OCFS2_I(inode)->ip_blkno);
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 		*orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
 	}
@@ -226,8 +227,8 @@ static int ocfs2_process_delete_request(struct inode *inode,
 	}
 
 	if (filemap_fdatawrite(inode->i_mapping)) {
-		mlog(ML_ERROR, "Could not sync inode %"MLFu64" for delete!\n",
-		     OCFS2_I(inode)->ip_blkno);
+		mlog(ML_ERROR, "Could not sync inode %llu for delete!\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		goto done;
 	}
 	sync_mapping_buffers(inode->i_mapping);
@@ -302,8 +303,8 @@ static void ocfs2_process_dentry_request(struct inode *inode,
 	struct list_head *p;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
-	mlog(0, "parent %"MLFu64", namelen = %u, name = %.*s\n", parent_blkno,
-	     namelen, namelen, name);
+	mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
+	     (unsigned long long)parent_blkno, namelen, namelen, name);
 
 	spin_lock(&dcache_lock);
 
@@ -370,9 +371,10 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	if (request == OCFS2_VOTE_REQ_DELETE)
 		orphaned_slot = be32_to_cpu(msg->md1.v_orphaned_slot);
 
-	mlog(0, "processing vote: request = %u, blkno = %"MLFu64", "
+	mlog(0, "processing vote: request = %u, blkno = %llu, "
 	     "generation = %u, node_num = %u, priv1 = %u\n", request,
-	     blkno, generation, node_num, be32_to_cpu(msg->md1.v_generic1));
+	     (unsigned long long)blkno, generation, node_num,
+	     be32_to_cpu(msg->md1.v_generic1));
 
 	if (!ocfs2_is_valid_vote_request(request)) {
 		mlog(ML_ERROR, "Invalid vote request %d from node %u\n",
@@ -419,11 +421,12 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 	 * we had not found an inode in the first place. */
 	if (inode->i_generation != generation) {
 		mlog(0, "generation passed %u != inode generation = %u, "
-		     "ip_flags = %x, ip_blkno = %"MLFu64", msg %"MLFu64", "
-		     "i_count = %u, message type = %u\n",
-		     generation, inode->i_generation, OCFS2_I(inode)->ip_flags,
-		     OCFS2_I(inode)->ip_blkno, blkno,
-		     atomic_read(&inode->i_count), request);
+		     "ip_flags = %x, ip_blkno = %llu, msg %llu, i_count = %u, "
+		     "message type = %u\n", generation, inode->i_generation,
+		     OCFS2_I(inode)->ip_flags,
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     (unsigned long long)blkno, atomic_read(&inode->i_count),
+		     request);
 		iput(inode);
 		inode = NULL;
 		goto respond;
@@ -830,8 +833,9 @@ static void ocfs2_delete_response_cb(void *priv,
 
 	orphaned_slot = be32_to_cpu(resp->r_orphaned_slot);
 	node = be32_to_cpu(resp->r_hdr.h_node_num);
-	mlog(0, "node %d tells us that inode %"MLFu64" is orphaned in slot "
-	     "%d\n", node, OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "node %d tells us that inode %llu is orphaned in slot %d\n",
+	     node, (unsigned long long)OCFS2_I(inode)->ip_blkno,
+	     orphaned_slot);
 
 	/* The other node may not actually know which slot the inode
 	 * is orphaned in. */
@@ -845,9 +849,9 @@ static void ocfs2_delete_response_cb(void *priv,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != orphaned_slot &&
 			OCFS2_I(inode)->ip_orphaned_slot
-			!= OCFS2_INVALID_SLOT, "Inode %"MLFu64": Node %d "
-			"says it's orphaned in slot %d, we think it's in %d\n",
-			OCFS2_I(inode)->ip_blkno,
+			!= OCFS2_INVALID_SLOT, "Inode %llu: Node %d says it's "
+			"orphaned in slot %d, we think it's in %d\n",
+			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			be32_to_cpu(resp->r_hdr.h_node_num),
 			orphaned_slot, OCFS2_I(inode)->ip_orphaned_slot);
 
@@ -869,8 +873,8 @@ int ocfs2_request_delete_vote(struct inode *inode)
 	delete_cb.rc_cb = ocfs2_delete_response_cb;
 	delete_cb.rc_priv = inode;
 
-	mlog(0, "Inode %"MLFu64", we start thinking orphaned slot is %d\n",
-	     OCFS2_I(inode)->ip_blkno, orphaned_slot);
+	mlog(0, "Inode %llu, we start thinking orphaned slot is %d\n",
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, orphaned_slot);
 
 	status = -ENOMEM;
 	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
@@ -895,8 +899,8 @@ static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
 	 * d_delete against it. Parent directory block and full name
 	 * should suffice. */
 
-	mlog(0, "unlink/rename request: parent: %"MLFu64" name: %.*s\n",
-	     OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
+	mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
+	     (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
 	     dentry->d_name.name);
 
 	request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
@@ -1082,7 +1086,8 @@ static int ocfs2_handle_response_message(struct o2net_msg *msg,
 	mlog(0, "received response message:\n");
 	mlog(0, "h_response_id = %u\n", response_id);
 	mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n", be64_to_cpu(resp->r_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n", node_num);
 	mlog(0, "r_response = %d\n", response_status);
@@ -1138,8 +1143,8 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
 	mlog(0, "h_response_id = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_response_id));
 	mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request));
-	mlog(0, "h_blkno = %"MLFu64"\n",
-	     be64_to_cpu(work->w_msg.v_hdr.h_blkno));
+	mlog(0, "h_blkno = %llu\n",
+	     (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno));
 	mlog(0, "h_generation = %u\n",
 	     be32_to_cpu(work->w_msg.v_hdr.h_generation));
 	mlog(0, "h_node_num = %u\n",
diff --git a/fs/open.c b/fs/open.c
index 1091dadd6c3..c32c89d6d8d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/audit.h>
 
 #include <asm/unistd.h>
 
@@ -626,6 +627,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
+	audit_inode(NULL, inode, 0);
+
 	err = -EROFS;
 	if (IS_RDONLY(inode))
 		goto out_putf;
@@ -775,7 +778,10 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 
 	file = fget(fd);
 	if (file) {
-		error = chown_common(file->f_dentry, user, group);
+		struct dentry * dentry;
+		dentry = file->f_dentry;
+		audit_inode(NULL, dentry->d_inode, 0);
+		error = chown_common(dentry, user, group);
 		fput(file);
 	}
 	return error;
@@ -890,6 +896,10 @@ EXPORT_SYMBOL(filp_open);
  * a fully instantiated struct file to the caller.
  * This function is meant to be called from within a filesystem's
  * lookup method.
+ * Beware of calling it for non-regular files! Those ->open methods might block
+ * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
+ * leading to a deadlock, as nobody can open that fifo anymore, because
+ * another process to open fifo will block on locked parent when doing lookup).
  * Note that in case of error, nd->intent.open.file is destroyed, but the
  * path information remains valid.
  * If the open callback is set to NULL, then the standard f_op->open()
diff --git a/fs/pipe.c b/fs/pipe.c
index 8aada8e426f..d976866a115 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -662,10 +662,9 @@ struct inode* pipe_new(struct inode* inode)
 {
 	struct pipe_inode_info *info;
 
-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!info)
 		goto fail_page;
-	memset(info, 0, sizeof(*info));
 	inode->i_pipe = info;
 
 	init_waitqueue_head(PIPE_WAIT(*inode));
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568de..8f1f49ceebe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
 	PROC_TGID_MAPS,
 	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
+	PROC_TGID_MOUNTSTATS,
 	PROC_TGID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
 	PROC_TID_MAPS,
 	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
+	PROC_TID_MOUNTSTATS,
 	PROC_TID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+	E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
 	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
 #endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
 	.poll		= mounts_poll,
 };
 
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &mountstats_op);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		struct namespace *namespace;
+		task_lock(task);
+		namespace = task->namespace;
+		if (namespace)
+			get_namespace(namespace);
+		task_unlock(task);
+
+		if (namespace)
+			m->private = namespace;
+		else {
+			seq_release(inode, file);
+			ret = -EINVAL;
+		}
+	}
+	return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+	.open		= mountstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+};
+
 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 			inode->i_fop = &proc_smaps_operations;
 			break;
 #endif
+		case PROC_TID_MOUNTSTATS:
+		case PROC_TGID_MOUNTSTATS:
+			inode->i_fop = &proc_mountstats_operations;
+			break;
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 826c131994c..1e9ea37d457 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -485,6 +485,40 @@ static struct file_operations proc_slabinfo_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+extern struct seq_operations slabstats_op;
+static int slabstats_open(struct inode *inode, struct file *file)
+{
+	unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	int ret = -ENOMEM;
+	if (n) {
+		ret = seq_open(file, &slabstats_op);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			*n = PAGE_SIZE / (2 * sizeof(unsigned long));
+			m->private = n;
+			n = NULL;
+		}
+		kfree(n);
+	}
+	return ret;
+}
+
+static int slabstats_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	kfree(m->private);
+	return seq_release(inode, file);
+}
+
+static struct file_operations proc_slabstats_operations = {
+	.open		= slabstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= slabstats_release,
+};
+#endif
 #endif
 
 static int show_stat(struct seq_file *p, void *v)
@@ -744,6 +778,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations);
+#endif
 #endif
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
diff --git a/fs/read_write.c b/fs/read_write.c
index 3f7a1a62165..34b1bf259ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -470,7 +470,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	 * verify all the pointers
 	 */
 	ret = -EINVAL;
-	if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0))
+	if (nr_segs > UIO_MAXIOV)
 		goto out;
 	if (!file->f_op)
 		goto out;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index be12879bb17..d0c1e865963 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1532,7 +1532,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		buf += write_bytes;
 		*ppos = pos += write_bytes;
 		count -= write_bytes;
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 	}
 
 	/* this is only true on error */
@@ -1546,10 +1546,10 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 		}
 	}
 
-	if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
-		res =
-		    generic_osync_inode(inode, file->f_mapping,
-					OSYNC_METADATA | OSYNC_DATA);
+	if (likely(res >= 0) &&
+	    (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
+		res = generic_osync_inode(inode, file->f_mapping,
+		                          OSYNC_METADATA | OSYNC_DATA);
 
 	mutex_unlock(&inode->i_mutex);
 	reiserfs_async_progress_wait(inode->i_sb);
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index aa22588019e..5600d3d60cf 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -191,9 +191,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
 					       "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
 					       key, vn->vn_affected_item_num,
 					       vn->vn_mode, M_DELETE);
-			} else
-				/* we can delete directory item, that has only one directory entry in it */
-				;
+			}
 		}
 #endif
 
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e237cd668e5..7a88adbceef 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -275,7 +275,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
 	int j;
 	__le32 *unp;
 	__u32 prev = INT_MAX;
-	int num;
+	int num = 0;
 
 	unp = (__le32 *) item;
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 5a9d2722fa0..1b73529b809 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2227,6 +2227,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
 	journal->j_last_flush_trans_id = trans_id;
 	journal->j_trans_id = trans_id + 1;
+	/* check for trans_id overflow */
+	if (journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	brelse(c_bh);
 	brelse(d_bh);
 	kfree(log_blocks);
@@ -2450,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
 		journal->j_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+		/* check for trans_id overflow */
+		if (journal->j_trans_id == 0)
+			journal->j_trans_id = 10;
 		journal->j_last_flush_trans_id =
 		    le32_to_cpu(jh->j_last_flush_trans_id);
 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -3873,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	int cur_write_start = 0;	/* start index of current log write */
 	int old_start;
 	int i;
-	int flush = flags & FLUSH_ALL;
-	int wait_on_commit = flags & WAIT;
+	int flush;
+	int wait_on_commit;
 	struct reiserfs_journal_list *jl, *temp_jl;
 	struct list_head *entry, *safe;
 	unsigned long jindex;
@@ -3884,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	BUG_ON(th->t_refcount > 1);
 	BUG_ON(!th->t_trans_id);
 
+	/* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+	if (th->t_trans_id == ~0UL)
+		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+	flush = flags & FLUSH_ALL;
+	wait_on_commit = flags & WAIT;
+
 	put_fs_excl();
 	current->journal_info = th->t_handle_save;
 	reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4105,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	journal->j_first = NULL;
 	journal->j_len = 0;
 	journal->j_trans_start_time = 0;
-	journal->j_trans_id++;
+	/* check for trans_id overflow */
+	if (++journal->j_trans_id == 0)
+		journal->j_trans_id = 10;
 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
 	journal->j_must_wait = 0;
 	journal->j_len_alloc = 0;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index e2d08d7bcff..d2b25e1ba6e 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -981,6 +981,8 @@ static inline int prepare_for_direntry_item(struct path *path,
 	return M_CUT;
 }
 
+#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
+
 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
     If the path points to an indirect item, remove some number of its unformatted nodes.
     In case of file truncate calculate whether this item must be deleted/truncated or last
@@ -1020,148 +1022,79 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
 
 	/* Case of an indirect item. */
 	{
-		int n_unfm_number,	/* Number of the item unformatted nodes. */
-		 n_counter, n_blk_size;
-		__le32 *p_n_unfm_pointer;	/* Pointer to the unformatted node number. */
-		__u32 tmp;
-		struct item_head s_ih;	/* Item header. */
-		char c_mode;	/* Returned mode of the balance. */
-		int need_research;
-
-		n_blk_size = p_s_sb->s_blocksize;
-
-		/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
-		do {
-			need_research = 0;
-			p_s_bh = PATH_PLAST_BUFFER(p_s_path);
-			/* Copy indirect item header to a temp variable. */
-			copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
-			/* Calculate number of unformatted nodes in this item. */
-			n_unfm_number = I_UNFM_NUM(&s_ih);
-
-			RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
-			       pos_in_item(p_s_path) + 1 != n_unfm_number,
-			       "PAP-5240: invalid item %h "
-			       "n_unfm_number = %d *p_n_pos_in_item = %d",
-			       &s_ih, n_unfm_number, pos_in_item(p_s_path));
-
-			/* Calculate balance mode and position in the item to remove unformatted nodes. */
-			if (n_new_file_length == max_reiserfs_offset(inode)) {	/* Case of delete. */
-				pos_in_item(p_s_path) = 0;
-				*p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
-				c_mode = M_DELETE;
-			} else {	/* Case of truncate. */
-				if (n_new_file_length < le_ih_k_offset(&s_ih)) {
-					pos_in_item(p_s_path) = 0;
-					*p_n_cut_size =
-					    -(IH_SIZE + ih_item_len(&s_ih));
-					c_mode = M_DELETE;	/* Delete this item. */
-				} else {
-					/* indirect item must be truncated starting from *p_n_pos_in_item-th position */
-					pos_in_item(p_s_path) =
-					    (n_new_file_length + n_blk_size -
-					     le_ih_k_offset(&s_ih)) >> p_s_sb->
-					    s_blocksize_bits;
-
-					RFALSE(pos_in_item(p_s_path) >
-					       n_unfm_number,
-					       "PAP-5250: invalid position in the item");
-
-					/* Either convert last unformatted node of indirect item to direct item or increase
-					   its free space.  */
-					if (pos_in_item(p_s_path) ==
-					    n_unfm_number) {
-						*p_n_cut_size = 0;	/* Nothing to cut. */
-						return M_CONVERT;	/* Maybe convert last unformatted node to the direct item. */
-					}
-					/* Calculate size to cut. */
-					*p_n_cut_size =
-					    -(ih_item_len(&s_ih) -
-					      pos_in_item(p_s_path) *
-					      UNFM_P_SIZE);
-
-					c_mode = M_CUT;	/* Cut from this indirect item. */
-				}
-			}
+	    int blk_size = p_s_sb->s_blocksize;
+	    struct item_head s_ih;
+	    int need_re_search;
+	    int delete = 0;
+	    int result = M_CUT;
+	    int pos = 0;
+
+	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {
+		/* prepare_for_delete_or_cut() is called by
+		 * reiserfs_delete_item() */
+		n_new_file_length = 0;
+		delete = 1;
+	    }
+
+	    do {
+		need_re_search = 0;
+		*p_n_cut_size = 0;
+		p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+		copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+		pos = I_UNFM_NUM(&s_ih);
 
-			RFALSE(n_unfm_number <= pos_in_item(p_s_path),
-			       "PAP-5260: invalid position in the indirect item");
-
-			/* pointers to be cut */
-			n_unfm_number -= pos_in_item(p_s_path);
-			/* Set pointer to the last unformatted node pointer that is to be cut. */
-			p_n_unfm_pointer =
-			    (__le32 *) B_I_PITEM(p_s_bh,
-						 &s_ih) + I_UNFM_NUM(&s_ih) -
-			    1 - *p_n_removed;
-
-			/* We go through the unformatted nodes pointers of the indirect
-			   item and look for the unformatted nodes in the cache. If we
-			   found some of them we free it, zero corresponding indirect item
-			   entry and log buffer containing that indirect item. For this we
-			   need to prepare last path element for logging. If some
-			   unformatted node has b_count > 1 we must not free this
-			   unformatted node since it is in use. */
-			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
-			// note: path could be changed, first line in for loop takes care
-			// of it
+		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
+		    __u32 *unfm, block;
 
-			for (n_counter = *p_n_removed;
-			     n_counter < n_unfm_number;
-			     n_counter++, p_n_unfm_pointer--) {
+		    /* Each unformatted block deletion may involve one additional
+		     * bitmap block into the transaction, thereby the initial
+		     * journal space reservation might not be enough. */
+		    if (!delete && (*p_n_cut_size) != 0 &&
+			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
+			break;
+		    }
 
-				cond_resched();
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-				RFALSE(p_n_unfm_pointer <
-				       (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
-				       || p_n_unfm_pointer >
-				       (__le32 *) B_I_PITEM(p_s_bh,
-							    &s_ih) +
-				       I_UNFM_NUM(&s_ih) - 1,
-				       "vs-5265: pointer out of range");
-
-				/* Hole, nothing to remove. */
-				if (!get_block_num(p_n_unfm_pointer, 0)) {
-					(*p_n_removed)++;
-					continue;
-				}
+		    unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
+		    block = get_block_num(unfm, 0);
 
-				(*p_n_removed)++;
+		    if (block != 0) {
+			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
+			put_block_num(unfm, 0, 0);
+			journal_mark_dirty (th, p_s_sb, p_s_bh);
+			reiserfs_free_block(th, inode, block, 1);
+		    }
 
-				tmp = get_block_num(p_n_unfm_pointer, 0);
-				put_block_num(p_n_unfm_pointer, 0, 0);
-				journal_mark_dirty(th, p_s_sb, p_s_bh);
-				reiserfs_free_block(th, inode, tmp, 1);
-				if (item_moved(&s_ih, p_s_path)) {
-					need_research = 1;
-					break;
-				}
-			}
+		    cond_resched();
 
-			/* a trick.  If the buffer has been logged, this
-			 ** will do nothing.  If we've broken the loop without
-			 ** logging it, it will restore the buffer
-			 **
-			 */
-			reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
-
-			/* This loop can be optimized. */
-		} while ((*p_n_removed < n_unfm_number || need_research) &&
-			 search_for_position_by_key(p_s_sb, p_s_item_key,
-						    p_s_path) ==
-			 POSITION_FOUND);
-
-		RFALSE(*p_n_removed < n_unfm_number,
-		       "PAP-5310: indirect item is not found");
-		RFALSE(item_moved(&s_ih, p_s_path),
-		       "after while, comp failed, retry");
-
-		if (c_mode == M_CUT)
-			pos_in_item(p_s_path) *= UNFM_P_SIZE;
-		return c_mode;
+		    if (item_moved (&s_ih, p_s_path))  {
+			need_re_search = 1;
+			break;
+		    }
+
+		    pos --;
+		    (*p_n_removed) ++;
+		    (*p_n_cut_size) -= UNFM_P_SIZE;
+
+		    if (pos == 0) {
+			(*p_n_cut_size) -= IH_SIZE;
+			result = M_DELETE;
+			break;
+		    }
+		}
+		/* a trick.  If the buffer has been logged, this will do nothing.  If
+		** we've broken the loop without logging it, it will restore the
+		** buffer */
+		reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
+	    } while (need_re_search &&
+		     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
+	    pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
+
+	    if (*p_n_cut_size == 0) {
+		/* Nothing were cut. maybe convert last unformatted node to the
+		 * direct item? */
+		result = M_CONVERT;
+	    }
+	    return result;
 	}
 }
 
@@ -1948,7 +1881,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 		 ** sure the file is consistent before ending the current trans
 		 ** and starting a new one
 		 */
-		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
+		if (journal_transaction_should_end(th, 0) ||
+		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
 			int orig_len_alloc = th->t_blocks_allocated;
 			decrement_counters_in_path(&s_search_path);
 
@@ -1962,7 +1896,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 			if (err)
 				goto out;
 			err = journal_begin(th, p_s_inode->i_sb,
-					    JOURNAL_PER_BALANCE_CNT * 6);
+					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
 			if (err)
 				goto out;
 			reiserfs_update_inode_transaction(p_s_inode);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 93e6ef9360e..cae2abbc0c7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -685,14 +685,14 @@ static const arg_desc_t logging_mode[] = {
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
 	{"writeback", 1 << REISERFS_DATA_WRITEBACK,
 	 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for -o barrier= */
 static const arg_desc_t barrier_mode[] = {
 	{"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
 	{"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
-	{NULL, 0}
+	{.value = NULL}
 };
 
 /* possible values for "-o block-allocator=" and bits which are to be set in
@@ -890,7 +890,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
 		{"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
 #endif
-		{"nolog",},	/* This is unsupported */
+		{.option_name = "nolog"},
 		{"replayonly",.setmask = 1 << REPLAYONLY},
 		{"block-allocator",.arg_required = 'a',.values = balloc},
 		{"data",.arg_required = 'd',.values = logging_mode},
@@ -908,7 +908,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"grpjquota",.arg_required =
 		 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
 		{"jqfmt",.arg_required = 'f',.values = NULL},
-		{NULL,}
+		{.option_name = NULL}
 	};
 
 	*blocks = 0;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index ab8894c3b9e..58c418fbca2 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -182,7 +182,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
 	struct posix_acl *acl, **p_acl;
-	size_t size;
+	int size;
 	int retval;
 	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
@@ -206,7 +206,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		return posix_acl_dup(*p_acl);
 
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
-	if ((int)size < 0) {
+	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
 			*p_acl = ERR_PTR(-ENODATA);
 			return NULL;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 44ed1d418b4..fdeabc0a34f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -217,7 +217,7 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
 		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
 			inode->i_ino,
-			(long) last_time, (long) inode->i_mtime,
+			(long) last_time, (long) inode->i_mtime.tv_sec,
 			(long) last_sz, (long) inode->i_size);
 
 		if (!S_ISDIR(inode->i_mode))
diff --git a/fs/super.c b/fs/super.c
index 37554b87618..8743e9bbb29 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -55,11 +55,10 @@ DEFINE_SPINLOCK(sb_lock);
  */
 static struct super_block *alloc_super(void)
 {
-	struct super_block *s = kmalloc(sizeof(struct super_block),  GFP_USER);
+	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
 
 	if (s) {
-		memset(s, 0, sizeof(struct super_block));
 		if (security_sb_alloc(s)) {
 			kfree(s);
 			s = NULL;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index d04cff2273b..81e0e8459af 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1341,13 +1341,11 @@ udf_update_inode(struct inode *inode, int do_sync)
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
 		fe->uid = cpu_to_le32(-1);
-	else if (inode->i_uid != UDF_SB(inode->i_sb)->s_uid)
-		fe->uid = cpu_to_le32(inode->i_uid);
+	else fe->uid = cpu_to_le32(inode->i_uid);
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
 		fe->gid = cpu_to_le32(-1);
-	else if (inode->i_gid != UDF_SB(inode->i_sb)->s_gid)
-		fe->gid = cpu_to_le32(inode->i_gid);
+	else fe->gid = cpu_to_le32(inode->i_gid);
 
 	udfperms =	((inode->i_mode & S_IRWXO)     ) |
 			((inode->i_mode & S_IRWXG) << 2) |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d257644a1ae..db98a4c71e6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -575,7 +575,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 		if (!silent)
 			printk("You didn't specify the type of your ufs filesystem\n\n"
 			"mount -t ufs -o ufstype="
-			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|netxstep-cd|openstep ...\n\n"
+			"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
 			">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
 			"default is ufstype=old\n");
 		ufs_set_opt (sbi->s_mount_opt, UFSTYPE_OLD);
diff --git a/fs/xattr.c b/fs/xattr.c
index 80eca7d3d69..e416190f5e9 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/module.h>
 #include <linux/fsnotify.h>
+#include <linux/audit.h>
 #include <asm/uaccess.h>
 
 
@@ -234,12 +235,15 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 	      size_t size, int flags)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = setxattr(f->f_dentry, name, value, size, flags);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = setxattr(dentry, name, value, size, flags);
 	fput(f);
 	return error;
 }
@@ -458,12 +462,15 @@ asmlinkage long
 sys_fremovexattr(int fd, char __user *name)
 {
 	struct file *f;
+	struct dentry *dentry;
 	int error = -EBADF;
 
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = removexattr(f->f_dentry, name);
+	dentry = f->f_dentry;
+	audit_inode(NULL, dentry->d_inode, 0);
+	error = removexattr(dentry, name);
 	fput(f);
 	return error;
 }