339 files changed, 14492 insertions, 11898 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
deleted file mode 100644
index 94e2f92ab2e..00000000000
--- a/fs/9p/9p.h
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * linux/fs/9p/9p.h
- *
- * 9P protocol definitions.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-/* Message Types */
-enum {
-	TVERSION = 100,
-	RVERSION,
-	TAUTH = 102,
-	RAUTH,
-	TATTACH = 104,
-	RATTACH,
-	TERROR = 106,
-	RERROR,
-	TFLUSH = 108,
-	RFLUSH,
-	TWALK = 110,
-	RWALK,
-	TOPEN = 112,
-	ROPEN,
-	TCREATE = 114,
-	RCREATE,
-	TREAD = 116,
-	RREAD,
-	TWRITE = 118,
-	RWRITE,
-	TCLUNK = 120,
-	RCLUNK,
-	TREMOVE = 122,
-	RREMOVE,
-	TSTAT = 124,
-	RSTAT,
-	TWSTAT = 126,
-	RWSTAT,
-};
-
-/* modes */
-enum {
-	V9FS_OREAD = 0x00,
-	V9FS_OWRITE = 0x01,
-	V9FS_ORDWR = 0x02,
-	V9FS_OEXEC = 0x03,
-	V9FS_OEXCL = 0x04,
-	V9FS_OTRUNC = 0x10,
-	V9FS_OREXEC = 0x20,
-	V9FS_ORCLOSE = 0x40,
-	V9FS_OAPPEND = 0x80,
-};
-
-/* permissions */
-enum {
-	V9FS_DMDIR = 0x80000000,
-	V9FS_DMAPPEND = 0x40000000,
-	V9FS_DMEXCL = 0x20000000,
-	V9FS_DMMOUNT = 0x10000000,
-	V9FS_DMAUTH = 0x08000000,
-	V9FS_DMTMP = 0x04000000,
-	V9FS_DMSYMLINK = 0x02000000,
-	V9FS_DMLINK = 0x01000000,
-	/* 9P2000.u extensions */
-	V9FS_DMDEVICE = 0x00800000,
-	V9FS_DMNAMEDPIPE = 0x00200000,
-	V9FS_DMSOCKET = 0x00100000,
-	V9FS_DMSETUID = 0x00080000,
-	V9FS_DMSETGID = 0x00040000,
-};
-
-/* qid.types */
-enum {
-	V9FS_QTDIR = 0x80,
-	V9FS_QTAPPEND = 0x40,
-	V9FS_QTEXCL = 0x20,
-	V9FS_QTMOUNT = 0x10,
-	V9FS_QTAUTH = 0x08,
-	V9FS_QTTMP = 0x04,
-	V9FS_QTSYMLINK = 0x02,
-	V9FS_QTLINK = 0x01,
-	V9FS_QTFILE = 0x00,
-};
-
-#define V9FS_NOTAG	(u16)(~0)
-#define V9FS_NOFID	(u32)(~0)
-#define V9FS_MAXWELEM	16
-
-/* ample room for Twrite/Rread header (iounit) */
-#define V9FS_IOHDRSZ	24
-
-struct v9fs_str {
-	u16 len;
-	char *str;
-};
-
-/* qids are the unique ID for a file (like an inode */
-struct v9fs_qid {
-	u8 type;
-	u32 version;
-	u64 path;
-};
-
-/* Plan 9 file metadata (stat) structure */
-struct v9fs_stat {
-	u16 size;
-	u16 type;
-	u32 dev;
-	struct v9fs_qid qid;
-	u32 mode;
-	u32 atime;
-	u32 mtime;
-	u64 length;
-	struct v9fs_str name;
-	struct v9fs_str uid;
-	struct v9fs_str gid;
-	struct v9fs_str muid;
-	struct v9fs_str extension;	/* 9p2000.u extensions */
-	u32 n_uid;		/* 9p2000.u extensions */
-	u32 n_gid;		/* 9p2000.u extensions */
-	u32 n_muid;		/* 9p2000.u extensions */
-};
-
-/* file metadata (stat) structure used to create Twstat message
-   The is similar to v9fs_stat, but the strings don't point to
-   the same memory block and should be freed separately
-*/
-struct v9fs_wstat {
-	u16 size;
-	u16 type;
-	u32 dev;
-	struct v9fs_qid qid;
-	u32 mode;
-	u32 atime;
-	u32 mtime;
-	u64 length;
-	char *name;
-	char *uid;
-	char *gid;
-	char *muid;
-	char *extension;	/* 9p2000.u extensions */
-	u32 n_uid;		/* 9p2000.u extensions */
-	u32 n_gid;		/* 9p2000.u extensions */
-	u32 n_muid;		/* 9p2000.u extensions */
-};
-
-/* Structures for Protocol Operations */
-
-struct Tversion {
-	u32 msize;
-	struct v9fs_str version;
-};
-
-struct Rversion {
-	u32 msize;
-	struct v9fs_str version;
-};
-
-struct Tauth {
-	u32 afid;
-	struct v9fs_str uname;
-	struct v9fs_str aname;
-};
-
-struct Rauth {
-	struct v9fs_qid qid;
-};
-
-struct Rerror {
-	struct v9fs_str error;
-	u32 errno;		/* 9p2000.u extension */
-};
-
-struct Tflush {
-	u16 oldtag;
-};
-
-struct Rflush {
-};
-
-struct Tattach {
-	u32 fid;
-	u32 afid;
-	struct v9fs_str uname;
-	struct v9fs_str aname;
-};
-
-struct Rattach {
-	struct v9fs_qid qid;
-};
-
-struct Twalk {
-	u32 fid;
-	u32 newfid;
-	u16 nwname;
-	struct v9fs_str wnames[16];
-};
-
-struct Rwalk {
-	u16 nwqid;
-	struct v9fs_qid wqids[16];
-};
-
-struct Topen {
-	u32 fid;
-	u8 mode;
-};
-
-struct Ropen {
-	struct v9fs_qid qid;
-	u32 iounit;
-};
-
-struct Tcreate {
-	u32 fid;
-	struct v9fs_str name;
-	u32 perm;
-	u8 mode;
-	struct v9fs_str extension;
-};
-
-struct Rcreate {
-	struct v9fs_qid qid;
-	u32 iounit;
-};
-
-struct Tread {
-	u32 fid;
-	u64 offset;
-	u32 count;
-};
-
-struct Rread {
-	u32 count;
-	u8 *data;
-};
-
-struct Twrite {
-	u32 fid;
-	u64 offset;
-	u32 count;
-	u8 *data;
-};
-
-struct Rwrite {
-	u32 count;
-};
-
-struct Tclunk {
-	u32 fid;
-};
-
-struct Rclunk {
-};
-
-struct Tremove {
-	u32 fid;
-};
-
-struct Rremove {
-};
-
-struct Tstat {
-	u32 fid;
-};
-
-struct Rstat {
-	struct v9fs_stat stat;
-};
-
-struct Twstat {
-	u32 fid;
-	struct v9fs_stat stat;
-};
-
-struct Rwstat {
-};
-
-/*
-  * fcall is the primary packet structure
-  *
-  */
-
-struct v9fs_fcall {
-	u32 size;
-	u8 id;
-	u16 tag;
-	void *sdata;
-
-	union {
-		struct Tversion tversion;
-		struct Rversion rversion;
-		struct Tauth tauth;
-		struct Rauth rauth;
-		struct Rerror rerror;
-		struct Tflush tflush;
-		struct Rflush rflush;
-		struct Tattach tattach;
-		struct Rattach rattach;
-		struct Twalk twalk;
-		struct Rwalk rwalk;
-		struct Topen topen;
-		struct Ropen ropen;
-		struct Tcreate tcreate;
-		struct Rcreate rcreate;
-		struct Tread tread;
-		struct Rread rread;
-		struct Twrite twrite;
-		struct Rwrite rwrite;
-		struct Tclunk tclunk;
-		struct Rclunk rclunk;
-		struct Tremove tremove;
-		struct Rremove rremove;
-		struct Tstat tstat;
-		struct Rstat rstat;
-		struct Twstat twstat;
-		struct Rwstat rwstat;
-	} params;
-};
-
-#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
-	fcall?fcall->params.rerror.error.len:0, \
-	fcall?fcall->params.rerror.error.str:"");
-
-int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-		   char *version, struct v9fs_fcall **rcall);
-
-int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-		  u32 fid, u32 afid, struct v9fs_fcall **rcall);
-
-int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
-
-int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
-		struct v9fs_fcall **rcall);
-
-int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-		 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
-
-int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-		char *name, struct v9fs_fcall **rcall);
-
-int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-		struct v9fs_fcall **rcall);
-
-int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-		  struct v9fs_fcall **rcall);
-
-int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-	u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
-
-int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
-		u64 offset, u32 count, struct v9fs_fcall **rcall);
-
-int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-		 u32 count, const char __user * data,
-		 struct v9fs_fcall **rcall);
-int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 87897f84dfb..bc7f0d1551e 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,18 +1,12 @@
 obj-$(CONFIG_9P_FS) := 9p.o
 
 9p-objs := \
-	trans_fd.o \
-	mux.o \
-	fcall.o \
-	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
 	vfs_addr.o \
 	vfs_file.o \
 	vfs_dir.o \
 	vfs_dentry.o \
-	error.o \
 	v9fs.o \
 	fid.o \
-	fcprint.o
 
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
deleted file mode 100644
index a3ed571eee3..00000000000
--- a/fs/9p/conv.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * linux/fs/9p/conv.c
- *
- * 9P protocol conversion functions
- *
- *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include <asm/uaccess.h>
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-
-/*
- * Buffer to help with string parsing
- */
-struct cbuf {
-	unsigned char *sp;
-	unsigned char *p;
-	unsigned char *ep;
-};
-
-static inline void buf_init(struct cbuf *buf, void *data, int datalen)
-{
-	buf->sp = buf->p = data;
-	buf->ep = data + datalen;
-}
-
-static inline int buf_check_overflow(struct cbuf *buf)
-{
-	return buf->p > buf->ep;
-}
-
-static int buf_check_size(struct cbuf *buf, int len)
-{
-	if (buf->p + len > buf->ep) {
-		if (buf->p < buf->ep) {
-			eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
-				len, (int)(buf->ep - buf->p));
-			dump_stack();
-			buf->p = buf->ep + 1;
-		}
-
-		return 0;
-	}
-
-	return 1;
-}
-
-static void *buf_alloc(struct cbuf *buf, int len)
-{
-	void *ret = NULL;
-
-	if (buf_check_size(buf, len)) {
-		ret = buf->p;
-		buf->p += len;
-	}
-
-	return ret;
-}
-
-static void buf_put_int8(struct cbuf *buf, u8 val)
-{
-	if (buf_check_size(buf, 1)) {
-		buf->p[0] = val;
-		buf->p++;
-	}
-}
-
-static void buf_put_int16(struct cbuf *buf, u16 val)
-{
-	if (buf_check_size(buf, 2)) {
-		*(__le16 *) buf->p = cpu_to_le16(val);
-		buf->p += 2;
-	}
-}
-
-static void buf_put_int32(struct cbuf *buf, u32 val)
-{
-	if (buf_check_size(buf, 4)) {
-		*(__le32 *)buf->p = cpu_to_le32(val);
-		buf->p += 4;
-	}
-}
-
-static void buf_put_int64(struct cbuf *buf, u64 val)
-{
-	if (buf_check_size(buf, 8)) {
-		*(__le64 *)buf->p = cpu_to_le64(val);
-		buf->p += 8;
-	}
-}
-
-static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
-{
-	char *ret;
-
-	ret = NULL;
-	if (buf_check_size(buf, slen + 2)) {
-		buf_put_int16(buf, slen);
-		ret = buf->p;
-		memcpy(buf->p, s, slen);
-		buf->p += slen;
-	}
-
-	return ret;
-}
-
-static inline void buf_put_string(struct cbuf *buf, const char *s)
-{
-	buf_put_stringn(buf, s, strlen(s));
-}
-
-static u8 buf_get_int8(struct cbuf *buf)
-{
-	u8 ret = 0;
-
-	if (buf_check_size(buf, 1)) {
-		ret = buf->p[0];
-		buf->p++;
-	}
-
-	return ret;
-}
-
-static u16 buf_get_int16(struct cbuf *buf)
-{
-	u16 ret = 0;
-
-	if (buf_check_size(buf, 2)) {
-		ret = le16_to_cpu(*(__le16 *)buf->p);
-		buf->p += 2;
-	}
-
-	return ret;
-}
-
-static u32 buf_get_int32(struct cbuf *buf)
-{
-	u32 ret = 0;
-
-	if (buf_check_size(buf, 4)) {
-		ret = le32_to_cpu(*(__le32 *)buf->p);
-		buf->p += 4;
-	}
-
-	return ret;
-}
-
-static u64 buf_get_int64(struct cbuf *buf)
-{
-	u64 ret = 0;
-
-	if (buf_check_size(buf, 8)) {
-		ret = le64_to_cpu(*(__le64 *)buf->p);
-		buf->p += 8;
-	}
-
-	return ret;
-}
-
-static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
-{
-	vstr->len = buf_get_int16(buf);
-	if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-		vstr->str = buf->p;
-		buf->p += vstr->len;
-	} else {
-		vstr->len = 0;
-		vstr->str = NULL;
-	}
-}
-
-static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
-{
-	qid->type = buf_get_int8(bufp);
-	qid->version = buf_get_int32(bufp);
-	qid->path = buf_get_int64(bufp);
-}
-
-/**
- * v9fs_size_wstat - calculate the size of a variable length stat struct
- * @stat: metadata (stat) structure
- * @extended: non-zero if 9P2000.u
- *
- */
-
-static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
-{
-	int size = 0;
-
-	if (wstat == NULL) {
-		eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
-		return 0;
-	}
-
-	size =			/* 2 + *//* size[2] */
-	    2 +			/* type[2] */
-	    4 +			/* dev[4] */
-	    1 +			/* qid.type[1] */
-	    4 +			/* qid.vers[4] */
-	    8 +			/* qid.path[8] */
-	    4 +			/* mode[4] */
-	    4 +			/* atime[4] */
-	    4 +			/* mtime[4] */
-	    8 +			/* length[8] */
-	    8;			/* minimum sum of string lengths */
-
-	if (wstat->name)
-		size += strlen(wstat->name);
-	if (wstat->uid)
-		size += strlen(wstat->uid);
-	if (wstat->gid)
-		size += strlen(wstat->gid);
-	if (wstat->muid)
-		size += strlen(wstat->muid);
-
-	if (extended) {
-		size += 4 +	/* n_uid[4] */
-		    4 +		/* n_gid[4] */
-		    4 +		/* n_muid[4] */
-		    2;		/* string length of extension[4] */
-		if (wstat->extension)
-			size += strlen(wstat->extension);
-	}
-
-	return size;
-}
-
-/**
- * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @bufp: buffer to deserialize
- * @stat: metadata (stat) structure
- * @extended: non-zero if 9P2000.u
- *
- */
-
-static void
-buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
-{
-	stat->size = buf_get_int16(bufp);
-	stat->type = buf_get_int16(bufp);
-	stat->dev = buf_get_int32(bufp);
-	stat->qid.type = buf_get_int8(bufp);
-	stat->qid.version = buf_get_int32(bufp);
-	stat->qid.path = buf_get_int64(bufp);
-	stat->mode = buf_get_int32(bufp);
-	stat->atime = buf_get_int32(bufp);
-	stat->mtime = buf_get_int32(bufp);
-	stat->length = buf_get_int64(bufp);
-	buf_get_str(bufp, &stat->name);
-	buf_get_str(bufp, &stat->uid);
-	buf_get_str(bufp, &stat->gid);
-	buf_get_str(bufp, &stat->muid);
-
-	if (extended) {
-		buf_get_str(bufp, &stat->extension);
-		stat->n_uid = buf_get_int32(bufp);
-		stat->n_gid = buf_get_int32(bufp);
-		stat->n_muid = buf_get_int32(bufp);
-	}
-}
-
-/**
- * v9fs_deserialize_stat - decode a received metadata structure
- * @buf: buffer to deserialize
- * @buflen: length of received buffer
- * @stat: metadata structure to decode into
- * @extended: non-zero if 9P2000.u
- *
- * Note: stat will point to the buf region.
- */
-
-int
-v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-		int extended)
-{
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	unsigned char *p;
-
-	buf_init(bufp, buf, buflen);
-	p = bufp->p;
-	buf_get_stat(bufp, stat, extended);
-
-	if (buf_check_overflow(bufp))
-		return 0;
-	else
-		return bufp->p - p;
-}
-
-/**
- * deserialize_fcall - unmarshal a response
- * @buf: recieved buffer
- * @buflen: length of received buffer
- * @rcall: fcall structure to populate
- * @rcalllen: length of fcall structure to populate
- * @extended: non-zero if 9P2000.u
- *
- */
-
-int
-v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-		       int extended)
-{
-
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	int i = 0;
-
-	buf_init(bufp, buf, buflen);
-
-	rcall->size = buf_get_int32(bufp);
-	rcall->id = buf_get_int8(bufp);
-	rcall->tag = buf_get_int16(bufp);
-
-	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
-		rcall->tag);
-
-	switch (rcall->id) {
-	default:
-		eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
-		return -EPROTO;
-	case RVERSION:
-		rcall->params.rversion.msize = buf_get_int32(bufp);
-		buf_get_str(bufp, &rcall->params.rversion.version);
-		break;
-	case RFLUSH:
-		break;
-	case RATTACH:
-		rcall->params.rattach.qid.type = buf_get_int8(bufp);
-		rcall->params.rattach.qid.version = buf_get_int32(bufp);
-		rcall->params.rattach.qid.path = buf_get_int64(bufp);
-		break;
-	case RWALK:
-		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
-			eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
-				V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
-			return -EPROTO;
-		}
-
-		for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-			buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-		break;
-	case ROPEN:
-		buf_get_qid(bufp, &rcall->params.ropen.qid);
-		rcall->params.ropen.iounit = buf_get_int32(bufp);
-		break;
-	case RCREATE:
-		buf_get_qid(bufp, &rcall->params.rcreate.qid);
-		rcall->params.rcreate.iounit = buf_get_int32(bufp);
-		break;
-	case RREAD:
-		rcall->params.rread.count = buf_get_int32(bufp);
-		rcall->params.rread.data = bufp->p;
-		buf_check_size(bufp, rcall->params.rread.count);
-		break;
-	case RWRITE:
-		rcall->params.rwrite.count = buf_get_int32(bufp);
-		break;
-	case RCLUNK:
-		break;
-	case RREMOVE:
-		break;
-	case RSTAT:
-		buf_get_int16(bufp);
-		buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
-		break;
-	case RWSTAT:
-		break;
-	case RERROR:
-		buf_get_str(bufp, &rcall->params.rerror.error);
-		if (extended)
-			rcall->params.rerror.errno = buf_get_int16(bufp);
-		break;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		dprintk(DEBUG_ERROR, "buffer overflow\n");
-		return -EIO;
-	}
-
-	return bufp->p - bufp->sp;
-}
-
-static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
-{
-	*p = val;
-	buf_put_int8(bufp, val);
-}
-
-static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
-{
-	*p = val;
-	buf_put_int16(bufp, val);
-}
-
-static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
-{
-	*p = val;
-	buf_put_int32(bufp, val);
-}
-
-static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
-{
-	*p = val;
-	buf_put_int64(bufp, val);
-}
-
-static void
-v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
-{
-	int len;
-	char *s;
-
-	if (data)
-		len = strlen(data);
-	else
-		len = 0;
-
-	s = buf_put_stringn(bufp, data, len);
-	if (str) {
-		str->len = len;
-		str->str = s;
-	}
-}
-
-static int
-v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	return copy_from_user(*pdata, data, count);
-}
-
-static void
-v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
-	       struct v9fs_stat *stat, int statsz, int extended)
-{
-	v9fs_put_int16(bufp, statsz, &stat->size);
-	v9fs_put_int16(bufp, wstat->type, &stat->type);
-	v9fs_put_int32(bufp, wstat->dev, &stat->dev);
-	v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
-	v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
-	v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
-	v9fs_put_int32(bufp, wstat->mode, &stat->mode);
-	v9fs_put_int32(bufp, wstat->atime, &stat->atime);
-	v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
-	v9fs_put_int64(bufp, wstat->length, &stat->length);
-
-	v9fs_put_str(bufp, wstat->name, &stat->name);
-	v9fs_put_str(bufp, wstat->uid, &stat->uid);
-	v9fs_put_str(bufp, wstat->gid, &stat->gid);
-	v9fs_put_str(bufp, wstat->muid, &stat->muid);
-
-	if (extended) {
-		v9fs_put_str(bufp, wstat->extension, &stat->extension);
-		v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
-		v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
-		v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
-	}
-}
-
-static struct v9fs_fcall *
-v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
-{
-	struct v9fs_fcall *fc;
-
-	size += 4 + 1 + 2;	/* size[4] id[1] tag[2] */
-	fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
-	if (!fc)
-		return ERR_PTR(-ENOMEM);
-
-	fc->sdata = (char *)fc + sizeof(*fc);
-
-	buf_init(bufp, (char *)fc->sdata, size);
-	v9fs_put_int32(bufp, size, &fc->size);
-	v9fs_put_int8(bufp, id, &fc->id);
-	v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
-
-	return fc;
-}
-
-void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
-{
-	fc->tag = tag;
-	*(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
-}
-
-struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(version);	/* msize[4] version[s] */
-	fc = v9fs_create_common(bufp, size, TVERSION);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
-	v9fs_put_str(bufp, version, &fc->params.tversion.version);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-#if 0
-struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(uname) + 2 + strlen(aname);	/* afid[4] uname[s] aname[s] */
-	fc = v9fs_create_common(bufp, size, TAUTH);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
-	v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
-	v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-#endif  /*  0  */
-
-struct v9fs_fcall *
-v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname);	/* fid[4] afid[4] uname[s] aname[s] */
-	fc = v9fs_create_common(bufp, size, TATTACH);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
-	v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
-	v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
-	v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
-
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 2;		/* oldtag[2] */
-	fc = v9fs_create_common(bufp, size, TFLUSH);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
-				     char **wnames)
-{
-	int i, size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	if (nwname > V9FS_MAXWELEM) {
-		dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
-		return NULL;
-	}
-
-	size = 4 + 4 + 2;	/* fid[4] newfid[4] nwname[2] ... */
-	for (i = 0; i < nwname; i++) {
-		size += 2 + strlen(wnames[i]);	/* wname[s] */
-	}
-
-	fc = v9fs_create_common(bufp, size, TWALK);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
-	v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
-	v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
-	for (i = 0; i < nwname; i++) {
-		v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 1;		/* fid[4] mode[1] */
-	fc = v9fs_create_common(bufp, size, TOPEN);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
-	v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int extended)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
-	if (extended) {
-		size += 2 +			/* extension[s] */
-		    (extension == NULL ? 0 : strlen(extension));
-	}
-
-	fc = v9fs_create_common(bufp, size, TCREATE);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
-	v9fs_put_str(bufp, name, &fc->params.tcreate.name);
-	v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
-	v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
-	if (extended)
-		v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 8 + 4;	/* fid[4] offset[8] count[4] */
-	fc = v9fs_create_common(bufp, size, TREAD);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
-	v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
-	v9fs_put_int32(bufp, count, &fc->params.tread.count);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
-				      const char __user * data)
-{
-	int size, err;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 8 + 4 + count;	/* fid[4] offset[8] count[4] data[count] */
-	fc = v9fs_create_common(bufp, size, TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
-	v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
-	v9fs_put_int32(bufp, count, &fc->params.twrite.count);
-	err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = v9fs_create_common(bufp, size, TCLUNK);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tremove(u32 fid)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = v9fs_create_common(bufp, size, TREMOVE);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_tstat(u32 fid)
-{
-	int size;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = v9fs_create_common(bufp, size, TSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
-
-struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
-				      int extended)
-{
-	int size, statsz;
-	struct v9fs_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	statsz = v9fs_size_wstat(wstat, extended);
-	size = 4 + 2 + 2 + statsz;	/* fid[4] stat[n] */
-	fc = v9fs_create_common(bufp, size, TWSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
-	buf_put_int16(bufp, statsz + 2);
-	v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-      error:
-	return fc;
-}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
deleted file mode 100644
index dd5b6b1b610..00000000000
--- a/fs/9p/conv.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * linux/fs/9p/conv.h
- *
- * 9P protocol conversion definitions.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
-	int extended);
-int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
-	int extended);
-
-void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
-
-struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
-struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
-	char *aname);
-struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
-struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
-	char **wnames);
-struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
-struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int extended);
-struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
-struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
-	const char __user *data);
-struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
-struct v9fs_fcall *v9fs_create_tremove(u32 fid);
-struct v9fs_fcall *v9fs_create_tstat(u32 fid);
-struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
-	int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
deleted file mode 100644
index 4228c0bb3c3..00000000000
--- a/fs/9p/debug.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  linux/fs/9p/debug.h - V9FS Debug Definitions
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#define DEBUG_ERROR		(1<<0)
-#define DEBUG_CURRENT		(1<<1)
-#define DEBUG_9P	        (1<<2)
-#define DEBUG_VFS	        (1<<3)
-#define DEBUG_CONV		(1<<4)
-#define DEBUG_MUX		(1<<5)
-#define DEBUG_TRANS		(1<<6)
-#define DEBUG_SLABS	      	(1<<7)
-#define DEBUG_FCALL		(1<<8)
-
-#define DEBUG_DUMP_PKT		0
-
-extern int v9fs_debug_level;
-
-#define dprintk(level, format, arg...) \
-do {  \
-	if((v9fs_debug_level & level)==level) \
-		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
-} while(0)
-
-#define eprintk(level, format, arg...) \
-do { \
-	printk(level "v9fs: %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
-} while(0)
-
-#if DEBUG_DUMP_PKT
-static inline void dump_data(const unsigned char *data, unsigned int datalen)
-{
-	int i, n;
-	char buf[5*8];
-
-	n = 0;
-	i = 0;
-	while (i < datalen) {
-		n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
-		if (i%4 == 0)
-			n += snprintf(buf+n, sizeof(buf)-n, " ");
-
-		if (i%16 == 0) {
-			dprintk(DEBUG_ERROR, "%s\n", buf);
-			n = 0;
-		}
-	}
-
-	dprintk(DEBUG_ERROR, "%s\n", buf);
-}
-#else				/* DEBUG_DUMP_PKT */
-static inline void dump_data(const unsigned char *data, unsigned int datalen)
-{
-
-}
-#endif				/* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
deleted file mode 100644
index 0d7fa4e0881..00000000000
--- a/fs/9p/error.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * linux/fs/9p/error.c
- *
- * Error string handling
- *
- * Plan 9 uses error strings, Unix uses error numbers.  These functions
- * try to help manage that and provide for dynamically adding error
- * mappings.
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/list.h>
-#include <linux/jhash.h>
-
-#include "debug.h"
-#include "error.h"
-
-/**
- * v9fs_error_init - preload
- * @errstr: error string
- *
- */
-
-int v9fs_error_init(void)
-{
-	struct errormap *c;
-	int bucket;
-
-	/* initialize hash table */
-	for (bucket = 0; bucket < ERRHASHSZ; bucket++)
-		INIT_HLIST_HEAD(&hash_errmap[bucket]);
-
-	/* load initial error map into hash table */
-	for (c = errmap; c->name != NULL; c++) {
-		c->namelen = strlen(c->name);
-		bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
-		INIT_HLIST_NODE(&c->list);
-		hlist_add_head(&c->list, &hash_errmap[bucket]);
-	}
-
-	return 1;
-}
-
-/**
- * errstr2errno - convert error string to error number
- * @errstr: error string
- *
- */
-
-int v9fs_errstr2errno(char *errstr, int len)
-{
-	int errno = 0;
-	struct hlist_node *p = NULL;
-	struct errormap *c = NULL;
-	int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-
-	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
-		if (c->namelen==len && !memcmp(c->name, errstr, len)) {
-			errno = c->val;
-			break;
-		}
-	}
-
-	if (errno == 0) {
-		/* TODO: if error isn't found, add it dynamically */
-		errstr[len] = 0;
-		printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
-		       errstr);
-		errno = 1;
-	}
-
-	return -errno;
-}
diff --git a/fs/9p/error.h b/fs/9p/error.h
deleted file mode 100644
index 5f3ca522b31..00000000000
--- a/fs/9p/error.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * linux/fs/9p/error.h
- *
- * Huge Nasty Error Table
- *
- * Plan 9 uses error strings, Unix uses error numbers.  This table tries to
- * match UNIX strings and Plan 9 strings to unix error numbers.  It is used
- * to preload the dynamic error table which can also track user-specific error
- * strings.
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/errno.h>
-#include <asm/errno.h>
-
-struct errormap {
-	char *name;
-	int val;
-
-	int namelen;
-	struct hlist_node list;
-};
-
-#define ERRHASHSZ		32
-static struct hlist_head hash_errmap[ERRHASHSZ];
-
-/* FixMe - reduce to a reasonable size */
-static struct errormap errmap[] = {
-	{"Operation not permitted", EPERM},
-	{"wstat prohibited", EPERM},
-	{"No such file or directory", ENOENT},
-	{"directory entry not found", ENOENT},
-	{"file not found", ENOENT},
-	{"Interrupted system call", EINTR},
-	{"Input/output error", EIO},
-	{"No such device or address", ENXIO},
-	{"Argument list too long", E2BIG},
-	{"Bad file descriptor", EBADF},
-	{"Resource temporarily unavailable", EAGAIN},
-	{"Cannot allocate memory", ENOMEM},
-	{"Permission denied", EACCES},
-	{"Bad address", EFAULT},
-	{"Block device required", ENOTBLK},
-	{"Device or resource busy", EBUSY},
-	{"File exists", EEXIST},
-	{"Invalid cross-device link", EXDEV},
-	{"No such device", ENODEV},
-	{"Not a directory", ENOTDIR},
-	{"Is a directory", EISDIR},
-	{"Invalid argument", EINVAL},
-	{"Too many open files in system", ENFILE},
-	{"Too many open files", EMFILE},
-	{"Text file busy", ETXTBSY},
-	{"File too large", EFBIG},
-	{"No space left on device", ENOSPC},
-	{"Illegal seek", ESPIPE},
-	{"Read-only file system", EROFS},
-	{"Too many links", EMLINK},
-	{"Broken pipe", EPIPE},
-	{"Numerical argument out of domain", EDOM},
-	{"Numerical result out of range", ERANGE},
-	{"Resource deadlock avoided", EDEADLK},
-	{"File name too long", ENAMETOOLONG},
-	{"No locks available", ENOLCK},
-	{"Function not implemented", ENOSYS},
-	{"Directory not empty", ENOTEMPTY},
-	{"Too many levels of symbolic links", ELOOP},
-	{"No message of desired type", ENOMSG},
-	{"Identifier removed", EIDRM},
-	{"No data available", ENODATA},
-	{"Machine is not on the network", ENONET},
-	{"Package not installed", ENOPKG},
-	{"Object is remote", EREMOTE},
-	{"Link has been severed", ENOLINK},
-	{"Communication error on send", ECOMM},
-	{"Protocol error", EPROTO},
-	{"Bad message", EBADMSG},
-	{"File descriptor in bad state", EBADFD},
-	{"Streams pipe error", ESTRPIPE},
-	{"Too many users", EUSERS},
-	{"Socket operation on non-socket", ENOTSOCK},
-	{"Message too long", EMSGSIZE},
-	{"Protocol not available", ENOPROTOOPT},
-	{"Protocol not supported", EPROTONOSUPPORT},
-	{"Socket type not supported", ESOCKTNOSUPPORT},
-	{"Operation not supported", EOPNOTSUPP},
-	{"Protocol family not supported", EPFNOSUPPORT},
-	{"Network is down", ENETDOWN},
-	{"Network is unreachable", ENETUNREACH},
-	{"Network dropped connection on reset", ENETRESET},
-	{"Software caused connection abort", ECONNABORTED},
-	{"Connection reset by peer", ECONNRESET},
-	{"No buffer space available", ENOBUFS},
-	{"Transport endpoint is already connected", EISCONN},
-	{"Transport endpoint is not connected", ENOTCONN},
-	{"Cannot send after transport endpoint shutdown", ESHUTDOWN},
-	{"Connection timed out", ETIMEDOUT},
-	{"Connection refused", ECONNREFUSED},
-	{"Host is down", EHOSTDOWN},
-	{"No route to host", EHOSTUNREACH},
-	{"Operation already in progress", EALREADY},
-	{"Operation now in progress", EINPROGRESS},
-	{"Is a named type file", EISNAM},
-	{"Remote I/O error", EREMOTEIO},
-	{"Disk quota exceeded", EDQUOT},
-/* errors from fossil, vacfs, and u9fs */
-	{"fid unknown or out of range", EBADF},
-	{"permission denied", EACCES},
-	{"file does not exist", ENOENT},
-	{"authentication failed", ECONNREFUSED},
-	{"bad offset in directory read", ESPIPE},
-	{"bad use of fid", EBADF},
-	{"wstat can't convert between files and directories", EPERM},
-	{"directory is not empty", ENOTEMPTY},
-	{"file exists", EEXIST},
-	{"file already exists", EEXIST},
-	{"file or directory already exists", EEXIST},
-	{"fid already in use", EBADF},
-	{"file in use", ETXTBSY},
-	{"i/o error", EIO},
-	{"file already open for I/O", ETXTBSY},
-	{"illegal mode", EINVAL},
-	{"illegal name", ENAMETOOLONG},
-	{"not a directory", ENOTDIR},
-	{"not a member of proposed group", EPERM},
-	{"not owner", EACCES},
-	{"only owner can change group in wstat", EACCES},
-	{"read only file system", EROFS},
-	{"no access to special file", EPERM},
-	{"i/o count too large", EIO},
-	{"unknown group", EINVAL},
-	{"unknown user", EINVAL},
-	{"bogus wstat buffer", EPROTO},
-	{"exclusive use file already open", EAGAIN},
-	{"corrupted directory entry", EIO},
-	{"corrupted file entry", EIO},
-	{"corrupted block label", EIO},
-	{"corrupted meta data", EIO},
-	{"illegal offset", EINVAL},
-	{"illegal path element", ENOENT},
-	{"root of file system is corrupted", EIO},
-	{"corrupted super block", EIO},
-	{"protocol botch", EPROTO},
-	{"file system is full", ENOSPC},
-	{"file is in use", EAGAIN},
-	{"directory entry is not allocated", ENOENT},
-	{"file is read only", EROFS},
-	{"file has been removed", EIDRM},
-	{"only support truncation to zero length", EPERM},
-	{"cannot remove root", EPERM},
-	{"file too big", EFBIG},
-	{"venti i/o error", EIO},
-	/* these are not errors */
-	{"u9fs rhostsauth: no authentication required", 0},
-	{"u9fs authnone: no authentication required", 0},
-	{NULL, -1}
-};
-
-extern int v9fs_error_init(void);
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
deleted file mode 100644
index dc336a67592..00000000000
--- a/fs/9p/fcall.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- *  linux/fs/9p/fcall.c
- *
- *  This file contains functions to perform synchronous 9P calls
- *
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "mux.h"
-
-/**
- * v9fs_t_version - negotiate protocol parameters with sever
- * @v9ses: 9P2000 session information
- * @msize: requested max size packet
- * @version: requested version.extension string
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-	       char *version, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-	tc = v9fs_create_tversion(msize, version);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_attach - mount the server
- * @v9ses: 9P2000 session information
- * @uname: user name doing the attach
- * @aname: remote name being attached to
- * @fid: mount fid to attatch to root node
- * @afid: authentication fid (in this case result key)
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-	      u32 fid, u32 afid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall* tc;
-
-	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
-		aname, fid, afid);
-
-	tc = v9fs_create_tattach(fid, afid, uname, aname);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
-	struct v9fs_fcall *rc, int err)
-{
-	int fid, id;
-	struct v9fs_session_info *v9ses;
-
-	id = 0;
-	fid = tc->params.tclunk.fid;
-	if (rc)
-		id = rc->id;
-
-	kfree(tc);
-	kfree(rc);
-	if (id == RCLUNK) {
-		v9ses = a;
-		v9fs_put_idpool(fid, &v9ses->fidpool);
-	}
-}
-
-/**
- * v9fs_t_clunk - release a fid (finish a transaction)
- * @v9ses: 9P2000 session information
- * @fid: fid to release
- * @fcall: pointer to response fcall pointer
- *
- */
-
-int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	rc = NULL;
-	tc = v9fs_create_tclunk(fid);
-	if (!IS_ERR(tc))
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-	else
-		ret = PTR_ERR(tc);
-
-	if (ret)
-		dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
-
-	v9fs_t_clunk_cb(v9ses, tc, rc, ret);
-	return ret;
-}
-
-#if 0
-/**
- * v9fs_v9fs_t_flush - flush a pending transaction
- * @v9ses: 9P2000 session information
- * @tag: tag to release
- *
- */
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
-
-	tc = v9fs_create_tflush(oldtag);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-#endif
-
-/**
- * v9fs_t_stat - read a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to get info about
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	ret = -ENOMEM;
-	tc = v9fs_create_tstat(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_wstat - write a file's meta-data
- * @v9ses: 9P2000 session information
- * @fid: fid pointing to file or directory to write info about
- * @stat: metadata
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-	     struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_walk - walk a fid to a new file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to walk
- * @newfid: new fid (for clone operations)
- * @name: path to walk fid to
- * @fcall: pointer to response fcall
- *
- */
-
-/* TODO: support multiple walk */
-
-int
-v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-	    char *name, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-	int nwname;
-
-	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-
-	if (name)
-		nwname = 1;
-	else
-		nwname = 0;
-
-	tc = v9fs_create_twalk(fid, newfid, nwname, &name);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_open - open a file
- *
- * @v9ses - 9P2000 session information
- * @fid - fid to open
- * @mode - mode to open file (R, RW, etc)
- * @fcall - pointer to response fcall
- *
- */
-
-int
-v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-	    struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-
-	tc = v9fs_create_topen(fid, mode);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_remove - remove a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to remove
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-	      struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d\n", fid);
-
-	tc = v9fs_create_tremove(fid);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_create - create a file or directory
- * @v9ses: 9P2000 session information
- * @fid: fid to create
- * @name: name of the file or directory to create
- * @perm: permissions to create with
- * @mode: mode to open file (R, RW, etc)
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
-	u8 mode, char *extension, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc;
-
-	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
-		fid, name, perm, mode);
-
-	tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
-		v9ses->extended);
-
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_read - read data
- * @v9ses: 9P2000 session information
- * @fid: fid to read from
- * @offset: offset to start read at
- * @count: how many bytes to read
- * @fcall: pointer to response fcall (with data)
- *
- */
-
-int
-v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-	    u32 count, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_tread(fid, offset, count);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-		if (!ret)
-			ret = rc->params.rread.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
-/**
- * v9fs_t_write - write data
- * @v9ses: 9P2000 session information
- * @fid: fid to write to
- * @offset: offset to start write at
- * @count: how many bytes to write
- * @fcall: pointer to response fcall
- *
- */
-
-int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
-	const char __user *data, struct v9fs_fcall **rcp)
-{
-	int ret;
-	struct v9fs_fcall *tc, *rc;
-
-	dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
-		(long long unsigned) offset, count);
-
-	tc = v9fs_create_twrite(fid, offset, count, data);
-	if (!IS_ERR(tc)) {
-		ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
-
-		if (!ret)
-			ret = rc->params.rwrite.count;
-		if (rcp)
-			*rcp = rc;
-		else
-			kfree(rc);
-
-		kfree(tc);
-	} else
-		ret = PTR_ERR(tc);
-
-	return ret;
-}
-
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
deleted file mode 100644
index 34b96114a28..00000000000
--- a/fs/9p/fcprint.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- *  linux/fs/9p/fcprint.c
- *
- *  Print 9P call.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "mux.h"
-
-static int
-v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
-{
-	int n;
-	char b[10];
-
-	n = 0;
-	if (q->type & V9FS_QTDIR)
-		b[n++] = 'd';
-	if (q->type & V9FS_QTAPPEND)
-		b[n++] = 'a';
-	if (q->type & V9FS_QTAUTH)
-		b[n++] = 'A';
-	if (q->type & V9FS_QTEXCL)
-		b[n++] = 'l';
-	if (q->type & V9FS_QTTMP)
-		b[n++] = 't';
-	if (q->type & V9FS_QTSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
-		q->version, b);
-}
-
-static int
-v9fs_printperm(char *buf, int buflen, int perm)
-{
-	int n;
-	char b[15];
-
-	n = 0;
-	if (perm & V9FS_DMDIR)
-		b[n++] = 'd';
-	if (perm & V9FS_DMAPPEND)
-		b[n++] = 'a';
-	if (perm & V9FS_DMAUTH)
-		b[n++] = 'A';
-	if (perm & V9FS_DMEXCL)
-		b[n++] = 'l';
-	if (perm & V9FS_DMTMP)
-		b[n++] = 't';
-	if (perm & V9FS_DMDEVICE)
-		b[n++] = 'D';
-	if (perm & V9FS_DMSOCKET)
-		b[n++] = 'S';
-	if (perm & V9FS_DMNAMEDPIPE)
-		b[n++] = 'P';
-	if (perm & V9FS_DMSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
-}
-
-static int
-v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
-{
-	int n;
-
-	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
-		st->name.str, st->uid.len, st->uid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
-
-	n += scnprintf(buf+n, buflen-n, " q ");
-	n += v9fs_printqid(buf+n, buflen-n, &st->qid);
-	n += scnprintf(buf+n, buflen-n, " m ");
-	n += v9fs_printperm(buf+n, buflen-n, st->mode);
-	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
-		st->atime, st->mtime, (long long int) st->length);
-
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
-			st->extension.len, st->extension.str);
-
-	return n;
-}
-
-static int
-v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	int i, n;
-
-	i = n = 0;
-	while (i < datalen) {
-		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
-		if (i%4 == 3)
-			n += scnprintf(buf + n, buflen - n, " ");
-		if (i%32 == 31)
-			n += scnprintf(buf + n, buflen - n, "\n");
-
-		i++;
-	}
-	n += scnprintf(buf + n, buflen - n, "\n");
-
-	return n;
-}
-
-static int
-v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
-}
-
-int
-v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
-{
-	int i, ret, type, tag;
-
-	if (!fc)
-		return scnprintf(buf, buflen, "<NULL>");
-
-	type = fc->id;
-	tag = fc->tag;
-
-	ret = 0;
-	switch (type) {
-	case TVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tversion tag %u msize %u version '%.*s'", tag,
-			fc->params.tversion.msize, fc->params.tversion.version.len,
-			fc->params.tversion.version.str);
-		break;
-
-	case RVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Rversion tag %u msize %u version '%.*s'", tag,
-			fc->params.rversion.msize, fc->params.rversion.version.len,
-			fc->params.rversion.version.str);
-		break;
-
-	case TAUTH:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
-			fc->params.tauth.afid, fc->params.tauth.uname.len,
-			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
-			fc->params.tauth.aname.str);
-		break;
-
-	case RAUTH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
-		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
-		break;
-
-	case TATTACH:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
-			tag, fc->params.tattach.fid, fc->params.tattach.afid,
-			fc->params.tattach.uname.len, fc->params.tattach.uname.str,
-			fc->params.tattach.aname.len, fc->params.tattach.aname.str);
-		break;
-
-	case RATTACH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
-		v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
-		break;
-
-	case RERROR:
-		ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
-			tag, fc->params.rerror.error.len,
-			fc->params.rerror.error.str);
-		if (extended)
-			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
-				fc->params.rerror.errno);
-		break;
-
-	case TFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
-			tag, fc->params.tflush.oldtag);
-		break;
-
-	case RFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
-		break;
-
-	case TWALK:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twalk tag %u fid %d newfid %d nwname %d", tag,
-			fc->params.twalk.fid, fc->params.twalk.newfid,
-			fc->params.twalk.nwname);
-		for(i = 0; i < fc->params.twalk.nwname; i++)
-			ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
-				fc->params.twalk.wnames[i].len,
-				fc->params.twalk.wnames[i].str);
-		break;
-
-	case RWALK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
-			tag, fc->params.rwalk.nwqid);
-		for(i = 0; i < fc->params.rwalk.nwqid; i++)
-			ret += v9fs_printqid(buf+ret, buflen-ret,
-				&fc->params.rwalk.wqids[i]);
-		break;
-
-	case TOPEN:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Topen tag %u fid %d mode %d", tag,
-			fc->params.topen.fid, fc->params.topen.mode);
-		break;
-
-	case ROPEN:
-		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
-		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
-		ret += scnprintf(buf+ret, buflen-ret," iounit %d",
-			fc->params.ropen.iounit);
-		break;
-
-	case TCREATE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
-			fc->params.tcreate.fid, fc->params.tcreate.name.len,
-			fc->params.tcreate.name.str);
-
-		ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
-		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
-			fc->params.tcreate.mode);
-		break;
-
-	case RCREATE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
-		ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.rcreate.iounit);
-		break;
-
-	case TREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tread tag %u fid %d offset %lld count %u", tag,
-			fc->params.tread.fid,
-			(long long int) fc->params.tread.offset,
-			fc->params.tread.count);
-		break;
-
-	case RREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Rread tag %u count %u data ", tag,
-			fc->params.rread.count);
-		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
-			fc->params.rread.count);
-		break;
-
-	case TWRITE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twrite tag %u fid %d offset %lld count %u data ",
-			tag, fc->params.twrite.fid,
-			(long long int) fc->params.twrite.offset,
-			fc->params.twrite.count);
-		ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
-			fc->params.twrite.count);
-		break;
-
-	case RWRITE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
-			tag, fc->params.rwrite.count);
-		break;
-
-	case TCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
-			tag, fc->params.tclunk.fid);
-		break;
-
-	case RCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
-		break;
-
-	case TREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
-			tag, fc->params.tremove.fid);
-		break;
-
-	case RREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
-		break;
-
-	case TSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
-			tag, fc->params.tstat.fid);
-		break;
-
-	case RSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
-		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
-			extended);
-		break;
-
-	case TWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
-			tag, fc->params.twstat.fid);
-		ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
-			extended);
-		break;
-
-	case RWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
-		break;
-
-	default:
-		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
-		break;
-	}
-
-	return ret;
-}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 90419715c7e..08fa320b7e6 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,10 +26,10 @@
 #include <linux/sched.h>
 #include <linux/idr.h>
 #include <asm/semaphore.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -40,67 +40,29 @@
  *
  */
 
-int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 {
-	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
-	dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
-		dentry->d_iname, dentry);
-	if (dentry->d_fsdata == NULL) {
-		dentry->d_fsdata =
-		    kmalloc(sizeof(struct list_head), GFP_KERNEL);
-		if (dentry->d_fsdata == NULL) {
-			dprintk(DEBUG_ERROR, "Out of memory\n");
-			return -ENOMEM;
-		}
-		fid_list = (struct list_head *)dentry->d_fsdata;
-		INIT_LIST_HEAD(fid_list);	/* Initialize list head */
-	}
+	struct v9fs_dentry *dent;
 
-	fid->uid = current->uid;
-	list_add(&fid->list, fid_list);
-	return 0;
-}
+	P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
+					fid->fid, dentry->d_iname);
 
-/**
- * v9fs_fid_create - allocate a FID structure
- * @dentry - dentry to link newly created fid to
- *
- */
-
-struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid)
-{
-	struct v9fs_fid *new;
+	dent = dentry->d_fsdata;
+	if (!dent) {
+		dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL);
+		if (!dent)
+			return -ENOMEM;
 
-	dprintk(DEBUG_9P, "fid create fid %d\n", fid);
-	new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
-	if (new == NULL) {
-		dprintk(DEBUG_ERROR, "Out of Memory\n");
-		return ERR_PTR(-ENOMEM);
+		spin_lock_init(&dent->lock);
+		INIT_LIST_HEAD(&dent->fidlist);
+		dentry->d_fsdata = dent;
 	}
 
-	new->fid = fid;
-	new->v9ses = v9ses;
-	new->fidopen = 0;
-	new->fidclunked = 0;
-	new->iounit = 0;
-	new->rdir_pos = 0;
-	new->rdir_fcall = NULL;
-	init_MUTEX(&new->lock);
-	INIT_LIST_HEAD(&new->list);
-
-	return new;
-}
-
-/**
- * v9fs_fid_destroy - deallocate a FID structure
- * @fid: fid to destroy
- *
- */
+	spin_lock(&dent->lock);
+	list_add(&fid->dlist, &dent->fidlist);
+	spin_unlock(&dent->lock);
 
-void v9fs_fid_destroy(struct v9fs_fid *fid)
-{
-	list_del(&fid->list);
-	kfree(fid);
+	return 0;
 }
 
 /**
@@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
  *
  */
 
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
-	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
-	struct v9fs_fid *return_fid = NULL;
-
-	dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
-
-	if (fid_list)
-		return_fid = list_entry(fid_list->next, struct v9fs_fid, list);
+	struct v9fs_dentry *dent;
+	struct p9_fid *fid;
+
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	dent = dentry->d_fsdata;
+	if (dent)
+		fid = list_entry(dent->fidlist.next, struct p9_fid, dlist);
+	else
+		fid = ERR_PTR(-EBADF);
+
+	P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid);
+	return fid;
+}
 
-	if (!return_fid) {
-		dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n");
-		return_fid = ERR_PTR(-EBADF);
+struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry)
+{
+	struct p9_fid *fid;
+	struct v9fs_dentry *dent;
+
+	dent = dentry->d_fsdata;
+	fid = v9fs_fid_lookup(dentry);
+	if (!IS_ERR(fid)) {
+		spin_lock(&dent->lock);
+		list_del(&fid->dlist);
+		spin_unlock(&dent->lock);
 	}
 
-	if(down_interruptible(&return_fid->lock))
-		return ERR_PTR(-EINTR);
-
-	return return_fid;
+	return fid;
 }
 
+
 /**
  * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
- * 			release it
+ * 	release it
  * @dentry: dentry to look for fid in
  *
  * find a fid in the dentry and then clone to a new private fid
@@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
  *
  */
 
-struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry)
+struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
 {
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF);
-	struct v9fs_fcall *fcall = NULL;
-	int fid, err;
-
-	base_fid = v9fs_fid_lookup(dentry);
-
-	if(IS_ERR(base_fid))
-		return base_fid;
-
-	if(base_fid) {  /* clone fid */
-		fid = v9fs_get_idpool(&v9ses->fidpool);
-		if (fid < 0) {
-			eprintk(KERN_WARNING, "newfid fails!\n");
-			new_fid = ERR_PTR(-ENOSPC);
-			goto Release_Fid;
-		}
-
-		err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall);
-		if (err < 0) {
-			dprintk(DEBUG_ERROR, "clone walk didn't work\n");
-			v9fs_put_idpool(fid, &v9ses->fidpool);
-			new_fid = ERR_PTR(err);
-			goto Free_Fcall;
-		}
-		new_fid = v9fs_fid_create(v9ses, fid);
-		if (new_fid == NULL) {
-			dprintk(DEBUG_ERROR, "out of memory\n");
-			new_fid = ERR_PTR(-ENOMEM);
-		}
-Free_Fcall:
-		kfree(fcall);
-	}
+	struct p9_fid *ofid, *fid;
 
-Release_Fid:
-	up(&base_fid->lock);
-	return new_fid;
-}
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	ofid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(ofid))
+		return ofid;
 
-void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid)
-{
-	v9fs_t_clunk(v9ses, fid->fid);
-	v9fs_fid_destroy(fid);
+	fid = p9_client_walk(ofid, 0, NULL, 1);
+	return fid;
 }
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 48fc170c26c..47a0ba74287 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,41 +22,12 @@
 
 #include <linux/list.h>
 
-#define FID_OP   0
-#define FID_WALK 1
-#define FID_CREATE 2
-
-struct v9fs_fid {
-	struct list_head list;	 /* list of fids associated with a dentry */
-	struct list_head active; /* XXX - debug */
-
-	struct semaphore lock;
-
-	u32 fid;
-	unsigned char fidopen;	  /* set when fid is opened */
-	unsigned char fidclunked; /* set when fid has already been clunked */
-
-	struct v9fs_qid qid;
-	u32 iounit;
-
-	/* readdir stuff */
-	int rdir_fpos;
-	loff_t rdir_pos;
-	struct v9fs_fcall *rdir_fcall;
-
-	/* management stuff */
-	uid_t uid;		/* user associated with this fid */
-
-	/* private data */
-	struct file *filp;	/* backpointer to File struct for open files */
-	struct v9fs_session_info *v9ses;	/* session info for this FID */
+struct v9fs_dentry {
+	spinlock_t lock; /* protect fidlist */
+	struct list_head fidlist;
 };
 
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry);
-struct v9fs_fid *v9fs_fid_get_created(struct dentry *);
-void v9fs_fid_destroy(struct v9fs_fid *fid);
-struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid);
-int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry);
-struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry);
-void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid);
-
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
+struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry);
+struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
+int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
deleted file mode 100644
index c783874a9ca..00000000000
--- a/fs/9p/mux.c
+++ /dev/null
@@ -1,1033 +0,0 @@
-/*
- * linux/fs/9p/mux.c
- *
- * Protocol Multiplexer
- *
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/kthread.h>
-#include <linux/idr.h>
-#include <linux/mutex.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
-#include "transport.h"
-#include "mux.h"
-
-#define ERREQFLUSH	1
-#define SCHED_TIMEOUT	10
-#define MAXPOLLWADDR	2
-
-enum {
-	Rworksched = 1,		/* read work scheduled or running */
-	Rpending = 2,		/* can read */
-	Wworksched = 4,		/* write work scheduled or running */
-	Wpending = 8,		/* can write */
-};
-
-enum {
-	None,
-	Flushing,
-	Flushed,
-};
-
-struct v9fs_mux_poll_task;
-
-struct v9fs_req {
-	spinlock_t lock;
-	int tag;
-	struct v9fs_fcall *tcall;
-	struct v9fs_fcall *rcall;
-	int err;
-	v9fs_mux_req_callback cb;
-	void *cba;
-	int flush;
-	struct list_head req_list;
-};
-
-struct v9fs_mux_data {
-	spinlock_t lock;
-	struct list_head mux_list;
-	struct v9fs_mux_poll_task *poll_task;
-	int msize;
-	unsigned char *extended;
-	struct v9fs_transport *trans;
-	struct v9fs_idpool tagpool;
-	int err;
-	wait_queue_head_t equeue;
-	struct list_head req_list;
-	struct list_head unsent_req_list;
-	struct v9fs_fcall *rcall;
-	int rpos;
-	char *rbuf;
-	int wpos;
-	int wsize;
-	char *wbuf;
-	wait_queue_t poll_wait[MAXPOLLWADDR];
-	wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
-	poll_table pt;
-	struct work_struct rq;
-	struct work_struct wq;
-	unsigned long wsched;
-};
-
-struct v9fs_mux_poll_task {
-	struct task_struct *task;
-	struct list_head mux_list;
-	int muxnum;
-};
-
-struct v9fs_mux_rpc {
-	struct v9fs_mux_data *m;
-	int err;
-	struct v9fs_fcall *tcall;
-	struct v9fs_fcall *rcall;
-	wait_queue_head_t wqueue;
-};
-
-static int v9fs_poll_proc(void *);
-static void v9fs_read_work(struct work_struct *work);
-static void v9fs_write_work(struct work_struct *work);
-static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
-			  poll_table * p);
-static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
-static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
-
-static DEFINE_MUTEX(v9fs_mux_task_lock);
-static struct workqueue_struct *v9fs_mux_wq;
-
-static int v9fs_mux_num;
-static int v9fs_mux_poll_task_num;
-static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
-
-int v9fs_mux_global_init(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
-		v9fs_mux_poll_tasks[i].task = NULL;
-
-	v9fs_mux_wq = create_workqueue("v9fs");
-	if (!v9fs_mux_wq) {
-		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void v9fs_mux_global_exit(void)
-{
-	destroy_workqueue(v9fs_mux_wq);
-}
-
-/**
- * v9fs_mux_calc_poll_procs - calculates the number of polling procs
- * based on the number of mounted v9fs filesystems.
- *
- * The current implementation returns sqrt of the number of mounts.
- */
-static int v9fs_mux_calc_poll_procs(int muxnum)
-{
-	int n;
-
-	if (v9fs_mux_poll_task_num)
-		n = muxnum / v9fs_mux_poll_task_num +
-		    (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
-	else
-		n = 1;
-
-	if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
-		n = ARRAY_SIZE(v9fs_mux_poll_tasks);
-
-	return n;
-}
-
-static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
-{
-	int i, n;
-	struct v9fs_mux_poll_task *vpt, *vptlast;
-	struct task_struct *pproc;
-
-	dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
-		v9fs_mux_poll_task_num);
-	mutex_lock(&v9fs_mux_task_lock);
-
-	n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
-	if (n > v9fs_mux_poll_task_num) {
-		for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
-			if (v9fs_mux_poll_tasks[i].task == NULL) {
-				vpt = &v9fs_mux_poll_tasks[i];
-				dprintk(DEBUG_MUX, "create proc %p\n", vpt);
-				pproc = kthread_create(v9fs_poll_proc, vpt,
-						   "v9fs-poll");
-
-				if (!IS_ERR(pproc)) {
-					vpt->task = pproc;
-					INIT_LIST_HEAD(&vpt->mux_list);
-					vpt->muxnum = 0;
-					v9fs_mux_poll_task_num++;
-					wake_up_process(vpt->task);
-				}
-				break;
-			}
-		}
-
-		if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
-			dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
-	}
-
-	n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
-	    ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
-
-	vptlast = NULL;
-	for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
-		vpt = &v9fs_mux_poll_tasks[i];
-		if (vpt->task != NULL) {
-			vptlast = vpt;
-			if (vpt->muxnum < n) {
-				dprintk(DEBUG_MUX, "put in proc %d\n", i);
-				list_add(&m->mux_list, &vpt->mux_list);
-				vpt->muxnum++;
-				m->poll_task = vpt;
-				memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-				init_poll_funcptr(&m->pt, v9fs_pollwait);
-				break;
-			}
-		}
-	}
-
-	if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
-		if (vptlast == NULL)
-			return -ENOMEM;
-
-		dprintk(DEBUG_MUX, "put in proc %d\n", i);
-		list_add(&m->mux_list, &vptlast->mux_list);
-		vptlast->muxnum++;
-		m->poll_task = vptlast;
-		memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-		init_poll_funcptr(&m->pt, v9fs_pollwait);
-	}
-
-	v9fs_mux_num++;
-	mutex_unlock(&v9fs_mux_task_lock);
-
-	return 0;
-}
-
-static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
-{
-	int i;
-	struct v9fs_mux_poll_task *vpt;
-
-	mutex_lock(&v9fs_mux_task_lock);
-	vpt = m->poll_task;
-	list_del(&m->mux_list);
-	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (m->poll_waddr[i] != NULL) {
-			remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
-			m->poll_waddr[i] = NULL;
-		}
-	}
-	vpt->muxnum--;
-	if (!vpt->muxnum) {
-		dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
-		kthread_stop(vpt->task);
-		vpt->task = NULL;
-		v9fs_mux_poll_task_num--;
-	}
-	v9fs_mux_num--;
-	mutex_unlock(&v9fs_mux_task_lock);
-}
-
-/**
- * v9fs_mux_init - allocate and initialize the per-session mux data
- * Creates the polling task if this is the first session.
- *
- * @trans - transport structure
- * @msize - maximum message size
- * @extended - pointer to the extended flag
- */
-struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
-				    unsigned char *extended)
-{
-	int i, n;
-	struct v9fs_mux_data *m, *mtmp;
-
-	dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
-	m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
-	if (!m)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&m->lock);
-	INIT_LIST_HEAD(&m->mux_list);
-	m->msize = msize;
-	m->extended = extended;
-	m->trans = trans;
-	idr_init(&m->tagpool.pool);
-	init_MUTEX(&m->tagpool.lock);
-	m->err = 0;
-	init_waitqueue_head(&m->equeue);
-	INIT_LIST_HEAD(&m->req_list);
-	INIT_LIST_HEAD(&m->unsent_req_list);
-	m->rcall = NULL;
-	m->rpos = 0;
-	m->rbuf = NULL;
-	m->wpos = m->wsize = 0;
-	m->wbuf = NULL;
-	INIT_WORK(&m->rq, v9fs_read_work);
-	INIT_WORK(&m->wq, v9fs_write_work);
-	m->wsched = 0;
-	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-	m->poll_task = NULL;
-	n = v9fs_mux_poll_start(m);
-	if (n)
-		return ERR_PTR(n);
-
-	n = trans->poll(trans, &m->pt);
-	if (n & POLLIN) {
-		dprintk(DEBUG_MUX, "mux %p can read\n", m);
-		set_bit(Rpending, &m->wsched);
-	}
-
-	if (n & POLLOUT) {
-		dprintk(DEBUG_MUX, "mux %p can write\n", m);
-		set_bit(Wpending, &m->wsched);
-	}
-
-	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (IS_ERR(m->poll_waddr[i])) {
-			v9fs_mux_poll_stop(m);
-			mtmp = (void *)m->poll_waddr;	/* the error code */
-			kfree(m);
-			m = mtmp;
-			break;
-		}
-	}
-
-	return m;
-}
-
-/**
- * v9fs_mux_destroy - cancels all pending requests and frees mux resources
- */
-void v9fs_mux_destroy(struct v9fs_mux_data *m)
-{
-	dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
-		m->mux_list.prev, m->mux_list.next);
-	v9fs_mux_cancel(m, -ECONNRESET);
-
-	if (!list_empty(&m->req_list)) {
-		/* wait until all processes waiting on this session exit */
-		dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
-			m);
-		wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
-		dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
-			list_empty(&m->req_list));
-	}
-
-	v9fs_mux_poll_stop(m);
-	m->trans = NULL;
-
-	kfree(m);
-}
-
-/**
- * v9fs_pollwait - called by files poll operation to add v9fs-poll task
- * 	to files wait queue
- */
-static void
-v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
-	      poll_table * p)
-{
-	int i;
-	struct v9fs_mux_data *m;
-
-	m = container_of(p, struct v9fs_mux_data, pt);
-	for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-		if (m->poll_waddr[i] == NULL)
-			break;
-
-	if (i >= ARRAY_SIZE(m->poll_waddr)) {
-		dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
-		return;
-	}
-
-	m->poll_waddr[i] = wait_address;
-
-	if (!wait_address) {
-		dprintk(DEBUG_ERROR, "no wait_address\n");
-		m->poll_waddr[i] = ERR_PTR(-EIO);
-		return;
-	}
-
-	init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-	add_wait_queue(wait_address, &m->poll_wait[i]);
-}
-
-/**
- * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
- */
-static void v9fs_poll_mux(struct v9fs_mux_data *m)
-{
-	int n;
-
-	if (m->err < 0)
-		return;
-
-	n = m->trans->poll(m->trans, NULL);
-	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
-		if (n >= 0)
-			n = -ECONNRESET;
-		v9fs_mux_cancel(m, n);
-	}
-
-	if (n & POLLIN) {
-		set_bit(Rpending, &m->wsched);
-		dprintk(DEBUG_MUX, "mux %p can read\n", m);
-		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
-			queue_work(v9fs_mux_wq, &m->rq);
-		}
-	}
-
-	if (n & POLLOUT) {
-		set_bit(Wpending, &m->wsched);
-		dprintk(DEBUG_MUX, "mux %p can write\n", m);
-		if ((m->wsize || !list_empty(&m->unsent_req_list))
-		    && !test_and_set_bit(Wworksched, &m->wsched)) {
-			dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
-			queue_work(v9fs_mux_wq, &m->wq);
-		}
-	}
-}
-
-/**
- * v9fs_poll_proc - polls all v9fs transports for new events and queues
- * 	the appropriate work to the work queue
- */
-static int v9fs_poll_proc(void *a)
-{
-	struct v9fs_mux_data *m, *mtmp;
-	struct v9fs_mux_poll_task *vpt;
-
-	vpt = a;
-	dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
-			v9fs_poll_mux(m);
-		}
-
-		dprintk(DEBUG_MUX, "sleeping...\n");
-		schedule_timeout(SCHED_TIMEOUT * HZ);
-	}
-
-	__set_current_state(TASK_RUNNING);
-	dprintk(DEBUG_MUX, "finish\n");
-	return 0;
-}
-
-/**
- * v9fs_write_work - called when a transport can send some data
- */
-static void v9fs_write_work(struct work_struct *work)
-{
-	int n, err;
-	struct v9fs_mux_data *m;
-	struct v9fs_req *req;
-
-	m = container_of(work, struct v9fs_mux_data, wq);
-
-	if (m->err < 0) {
-		clear_bit(Wworksched, &m->wsched);
-		return;
-	}
-
-	if (!m->wsize) {
-		if (list_empty(&m->unsent_req_list)) {
-			clear_bit(Wworksched, &m->wsched);
-			return;
-		}
-
-		spin_lock(&m->lock);
-again:
-		req = list_entry(m->unsent_req_list.next, struct v9fs_req,
-			       req_list);
-		list_move_tail(&req->req_list, &m->req_list);
-		if (req->err == ERREQFLUSH)
-			goto again;
-
-		m->wbuf = req->tcall->sdata;
-		m->wsize = req->tcall->size;
-		m->wpos = 0;
-		dump_data(m->wbuf, m->wsize);
-		spin_unlock(&m->lock);
-	}
-
-	dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
-	clear_bit(Wpending, &m->wsched);
-	err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
-	dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Wworksched, &m->wsched);
-		return;
-	}
-
-	if (err <= 0)
-		goto error;
-
-	m->wpos += err;
-	if (m->wpos == m->wsize)
-		m->wpos = m->wsize = 0;
-
-	if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
-		if (test_and_clear_bit(Wpending, &m->wsched))
-			n = POLLOUT;
-		else
-			n = m->trans->poll(m->trans, NULL);
-
-		if (n & POLLOUT) {
-			dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
-			queue_work(v9fs_mux_wq, &m->wq);
-		} else
-			clear_bit(Wworksched, &m->wsched);
-	} else
-		clear_bit(Wworksched, &m->wsched);
-
-	return;
-
-      error:
-	v9fs_mux_cancel(m, err);
-	clear_bit(Wworksched, &m->wsched);
-}
-
-static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-	int ecode;
-	struct v9fs_str *ename;
-
-	if (!req->err && req->rcall->id == RERROR) {
-		ecode = req->rcall->params.rerror.errno;
-		ename = &req->rcall->params.rerror.error;
-
-		dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
-
-		if (*m->extended)
-			req->err = -ecode;
-
-		if (!req->err) {
-			req->err = v9fs_errstr2errno(ename->str, ename->len);
-
-			if (!req->err) {	/* string match failed */
-				PRINT_FCALL_ERROR("unknown error", req->rcall);
-			}
-
-			if (!req->err)
-				req->err = -ESERVERFAULT;
-		}
-	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
-		dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
-			req->tcall->id + 1, req->rcall->id);
-		if (!req->err)
-			req->err = -EIO;
-	}
-}
-
-/**
- * v9fs_read_work - called when there is some data to be read from a transport
- */
-static void v9fs_read_work(struct work_struct *work)
-{
-	int n, err;
-	struct v9fs_mux_data *m;
-	struct v9fs_req *req, *rptr, *rreq;
-	struct v9fs_fcall *rcall;
-	char *rbuf;
-
-	m = container_of(work, struct v9fs_mux_data, rq);
-
-	if (m->err < 0)
-		return;
-
-	rcall = NULL;
-	dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
-
-	if (!m->rcall) {
-		m->rcall =
-		    kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
-		if (!m->rcall) {
-			err = -ENOMEM;
-			goto error;
-		}
-
-		m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
-		m->rpos = 0;
-	}
-
-	clear_bit(Rpending, &m->wsched);
-	err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
-	dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Rworksched, &m->wsched);
-		return;
-	}
-
-	if (err <= 0)
-		goto error;
-
-	m->rpos += err;
-	while (m->rpos > 4) {
-		n = le32_to_cpu(*(__le32 *) m->rbuf);
-		if (n >= m->msize) {
-			dprintk(DEBUG_ERROR,
-				"requested packet size too big: %d\n", n);
-			err = -EIO;
-			goto error;
-		}
-
-		if (m->rpos < n)
-			break;
-
-		dump_data(m->rbuf, n);
-		err =
-		    v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
-		if (err < 0) {
-			goto error;
-		}
-
-		if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
-			char buf[150];
-
-			v9fs_printfcall(buf, sizeof(buf), m->rcall,
-				*m->extended);
-			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-		}
-
-		rcall = m->rcall;
-		rbuf = m->rbuf;
-		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
-					   GFP_KERNEL);
-			if (!m->rcall) {
-				err = -ENOMEM;
-				goto error;
-			}
-
-			m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
-			memmove(m->rbuf, rbuf + n, m->rpos - n);
-			m->rpos -= n;
-		} else {
-			m->rcall = NULL;
-			m->rbuf = NULL;
-			m->rpos = 0;
-		}
-
-		dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
-			rcall->tag);
-
-		req = NULL;
-		spin_lock(&m->lock);
-		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-			if (rreq->tag == rcall->tag) {
-				req = rreq;
-				if (req->flush != Flushing)
-					list_del(&req->req_list);
-				break;
-			}
-		}
-		spin_unlock(&m->lock);
-
-		if (req) {
-			req->rcall = rcall;
-			process_request(m, req);
-
-			if (req->flush != Flushing) {
-				if (req->cb)
-					(*req->cb) (req, req->cba);
-				else
-					kfree(req->rcall);
-
-				wake_up(&m->equeue);
-			}
-		} else {
-			if (err >= 0 && rcall->id != RFLUSH)
-				dprintk(DEBUG_ERROR,
-					"unexpected response mux %p id %d tag %d\n",
-					m, rcall->id, rcall->tag);
-			kfree(rcall);
-		}
-	}
-
-	if (!list_empty(&m->req_list)) {
-		if (test_and_clear_bit(Rpending, &m->wsched))
-			n = POLLIN;
-		else
-			n = m->trans->poll(m->trans, NULL);
-
-		if (n & POLLIN) {
-			dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
-			queue_work(v9fs_mux_wq, &m->rq);
-		} else
-			clear_bit(Rworksched, &m->wsched);
-	} else
-		clear_bit(Rworksched, &m->wsched);
-
-	return;
-
-      error:
-	v9fs_mux_cancel(m, err);
-	clear_bit(Rworksched, &m->wsched);
-}
-
-/**
- * v9fs_send_request - send 9P request
- * The function can sleep until the request is scheduled for sending.
- * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent successfully. Can return errors
- * that can be retrieved by PTR_ERR macros.
- *
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to call when response is received
- * @cba: parameter to pass to the callback function
- */
-static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
-					  struct v9fs_fcall *tc,
-					  v9fs_mux_req_callback cb, void *cba)
-{
-	int n;
-	struct v9fs_req *req;
-
-	dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-		tc, tc->id);
-	if (m->err < 0)
-		return ERR_PTR(m->err);
-
-	req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	if (tc->id == TVERSION)
-		n = V9FS_NOTAG;
-	else
-		n = v9fs_mux_get_tag(m);
-
-	if (n < 0)
-		return ERR_PTR(-ENOMEM);
-
-	v9fs_set_tag(tc, n);
-	if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
-		char buf[150];
-
-		v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
-		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
-	}
-
-	spin_lock_init(&req->lock);
-	req->tag = n;
-	req->tcall = tc;
-	req->rcall = NULL;
-	req->err = 0;
-	req->cb = cb;
-	req->cba = cba;
-	req->flush = None;
-
-	spin_lock(&m->lock);
-	list_add_tail(&req->req_list, &m->unsent_req_list);
-	spin_unlock(&m->lock);
-
-	if (test_and_clear_bit(Wpending, &m->wsched))
-		n = POLLOUT;
-	else
-		n = m->trans->poll(m->trans, NULL);
-
-	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
-		queue_work(v9fs_mux_wq, &m->wq);
-
-	return req;
-}
-
-static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-	v9fs_mux_put_tag(m, req->tag);
-	kfree(req);
-}
-
-static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
-{
-	v9fs_mux_req_callback cb;
-	int tag;
-	struct v9fs_mux_data *m;
-	struct v9fs_req *req, *rreq, *rptr;
-
-	m = a;
-	dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tcall, freq->rcall, freq->err,
-		freq->tcall->params.tflush.oldtag);
-
-	spin_lock(&m->lock);
-	cb = NULL;
-	tag = freq->tcall->params.tflush.oldtag;
-	req = NULL;
-	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-		if (rreq->tag == tag) {
-			req = rreq;
-			list_del(&req->req_list);
-			break;
-		}
-	}
-	spin_unlock(&m->lock);
-
-	if (req) {
-		spin_lock(&req->lock);
-		req->flush = Flushed;
-		spin_unlock(&req->lock);
-
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-
-		wake_up(&m->equeue);
-	}
-
-	kfree(freq->tcall);
-	kfree(freq->rcall);
-	v9fs_mux_free_request(m, freq);
-}
-
-static int
-v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
-{
-	struct v9fs_fcall *fc;
-	struct v9fs_req *rreq, *rptr;
-
-	dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
-
-	/* if a response was received for a request, do nothing */
-	spin_lock(&req->lock);
-	if (req->rcall || req->err) {
-		spin_unlock(&req->lock);
-		dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
-		return 0;
-	}
-
-	req->flush = Flushing;
-	spin_unlock(&req->lock);
-
-	spin_lock(&m->lock);
-	/* if the request is not sent yet, just remove it from the list */
-	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
-		if (rreq->tag == req->tag) {
-			dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
-			list_del(&rreq->req_list);
-			req->flush = Flushed;
-			spin_unlock(&m->lock);
-			if (req->cb)
-				(*req->cb) (req, req->cba);
-			return 0;
-		}
-	}
-	spin_unlock(&m->lock);
-
-	clear_thread_flag(TIF_SIGPENDING);
-	fc = v9fs_create_tflush(req->tag);
-	v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
-	return 1;
-}
-
-static void
-v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
-{
-	struct v9fs_mux_rpc *r;
-
-	dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
-	r = a;
-	r->rcall = req->rcall;
-	r->err = req->err;
-
-	if (req->flush!=None && !req->err)
-		r->err = -ERESTARTSYS;
-
-	wake_up(&r->wqueue);
-}
-
-/**
- * v9fs_mux_rpc - sends 9P request and waits until a response is available.
- *	The function can be interrupted.
- * @m: mux data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
- */
-int
-v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-	     struct v9fs_fcall **rc)
-{
-	int err, sigpending;
-	unsigned long flags;
-	struct v9fs_req *req;
-	struct v9fs_mux_rpc r;
-
-	r.err = 0;
-	r.tcall = tc;
-	r.rcall = NULL;
-	r.m = m;
-	init_waitqueue_head(&r.wqueue);
-
-	if (rc)
-		*rc = NULL;
-
-	sigpending = 0;
-	if (signal_pending(current)) {
-		sigpending = 1;
-		clear_thread_flag(TIF_SIGPENDING);
-	}
-
-	req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		dprintk(DEBUG_MUX, "error %d\n", err);
-		return err;
-	}
-
-	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
-	if (r.err < 0)
-		err = r.err;
-
-	if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
-		if (v9fs_mux_flush_request(m, req)) {
-			/* wait until we get response of the flush message */
-			do {
-				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(r.wqueue,
-					r.rcall || r.err);
-			} while (!r.rcall && !r.err && err==-ERESTARTSYS &&
-				m->trans->status==Connected && !m->err);
-
-			err = -ERESTARTSYS;
-		}
-		sigpending = 1;
-	}
-
-	if (sigpending) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
-
-	if (rc)
-		*rc = r.rcall;
-	else
-		kfree(r.rcall);
-
-	v9fs_mux_free_request(m, req);
-	if (err > 0)
-		err = -EIO;
-
-	return err;
-}
-
-#if 0
-/**
- * v9fs_mux_rpcnb - sends 9P request without waiting for response.
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to be called when response arrives
- * @cba: value to pass to the callback function
- */
-int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
-		   v9fs_mux_req_callback cb, void *a)
-{
-	int err;
-	struct v9fs_req *req;
-
-	req = v9fs_send_request(m, tc, cb, a);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		dprintk(DEBUG_MUX, "error %d\n", err);
-		return PTR_ERR(req);
-	}
-
-	dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
-	return 0;
-}
-#endif  /*  0  */
-
-/**
- * v9fs_mux_cancel - cancel all pending requests with error
- * @m: mux data
- * @err: error code
- */
-void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
-{
-	struct v9fs_req *req, *rtmp;
-	LIST_HEAD(cancel_list);
-
-	dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
-	m->err = err;
-	spin_lock(&m->lock);
-	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	spin_unlock(&m->lock);
-
-	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
-		if (!req->err)
-			req->err = err;
-
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-	}
-
-	wake_up(&m->equeue);
-}
-
-static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
-{
-	int tag;
-
-	tag = v9fs_get_idpool(&m->tagpool);
-	if (tag < 0)
-		return V9FS_NOTAG;
-	else
-		return (u16) tag;
-}
-
-static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
-{
-	if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
-		v9fs_put_idpool(tag, &m->tagpool);
-}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
deleted file mode 100644
index fb10c50186a..00000000000
--- a/fs/9p/mux.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * linux/fs/9p/mux.h
- *
- * Multiplexer Definitions
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-struct v9fs_mux_data;
-struct v9fs_req;
-
-/**
- * v9fs_mux_req_callback - callback function that is called when the
- * response of a request is received. The callback is called from
- * a workqueue and shouldn't block.
- *
- * @a - the pointer that was specified when the request was send to be
- *      passed to the callback
- * @tc - request call
- * @rc - response call
- * @err - error code (non-zero if error occured)
- */
-typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
-
-int v9fs_mux_global_init(void);
-void v9fs_mux_global_exit(void);
-
-struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
-	unsigned char *extended);
-void v9fs_mux_destroy(struct v9fs_mux_data *);
-
-int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
-struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
-int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
-
-void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
-void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
-int v9fs_errstr2errno(char *errstr, int len);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
deleted file mode 100644
index 34d43355beb..00000000000
--- a/fs/9p/trans_fd.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * linux/fs/9p/trans_fd.c
- *
- * Fd transport layer.  Includes deprecated socket layer.
- *
- *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
- *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/ipv6.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/un.h>
-#include <asm/uaccess.h>
-#include <linux/inet.h>
-#include <linux/idr.h>
-#include <linux/file.h>
-
-#include "debug.h"
-#include "v9fs.h"
-#include "transport.h"
-
-#define V9FS_PORT 564
-
-struct v9fs_trans_fd {
-	struct file *rd;
-	struct file *wr;
-};
-
-/**
- * v9fs_fd_read- read from a fd
- * @v9ses: session information
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	struct v9fs_trans_fd *ts;
-
-	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
-		return -EREMOTEIO;
-
-	if (!(ts->rd->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-/**
- * v9fs_fd_write - write to a socket
- * @v9ses: session information
- * @v: buffer to send data from
- * @len: size of send buffer
- *
- */
-static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct v9fs_trans_fd *ts;
-
-	if (!trans || trans->status == Disconnected || !(ts = trans->priv))
-		return -EREMOTEIO;
-
-	if (!(ts->wr->f_flags & O_NONBLOCK))
-		dprintk(DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
-	set_fs(oldfs);
-
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-static unsigned int
-v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
-{
-	int ret, n;
-	struct v9fs_trans_fd *ts;
-	mm_segment_t oldfs;
-
-	if (!trans || trans->status != Connected || !(ts = trans->priv))
-		return -EREMOTEIO;
-
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
-		return -EIO;
-
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
-		return -EIO;
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-
-	ret = ts->rd->f_op->poll(ts->rd, pt);
-	if (ret < 0)
-		goto end;
-
-	if (ts->rd != ts->wr) {
-		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0) {
-			ret = n;
-			goto end;
-		}
-		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
-	}
-
-      end:
-	set_fs(oldfs);
-	return ret;
-}
-
-static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
-{
-	struct v9fs_transport *trans = v9ses->transport;
-	struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
-					   GFP_KERNEL);
-	if (!ts)
-		return -ENOMEM;
-
-	ts->rd = fget(rfd);
-	ts->wr = fget(wfd);
-	if (!ts->rd || !ts->wr) {
-		if (ts->rd)
-			fput(ts->rd);
-		if (ts->wr)
-			fput(ts->wr);
-		kfree(ts);
-		return -EIO;
-	}
-
-	trans->priv = ts;
-	trans->status = Connected;
-
-	return 0;
-}
-
-static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
-			char *data)
-{
-	if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
-		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return -ENOPROTOOPT;
-	}
-
-	return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
-}
-
-static int v9fs_socket_open(struct v9fs_session_info *v9ses,
-			    struct socket *csocket)
-{
-	int fd, ret;
-
-	csocket->sk->sk_allocation = GFP_NOIO;
-	if ((fd = sock_map_fd(csocket)) < 0) {
-		eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
-		ret = fd;
-	      release_csocket:
-		sock_release(csocket);
-		return ret;
-	}
-
-	if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
-		sockfd_put(csocket);
-		eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
-		goto release_csocket;
-	}
-
-	((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
-	    O_NONBLOCK;
-	return 0;
-}
-
-static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
-			 char *data)
-{
-	int ret;
-	struct socket *csocket = NULL;
-	struct sockaddr_in sin_server;
-
-	sin_server.sin_family = AF_INET;
-	sin_server.sin_addr.s_addr = in_aton(addr);
-	sin_server.sin_port = htons(v9ses->port);
-	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-
-	if (!csocket) {
-		eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
-		return -1;
-	}
-
-	ret = csocket->ops->connect(csocket,
-				    (struct sockaddr *)&sin_server,
-				    sizeof(struct sockaddr_in), 0);
-	if (ret < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_tcp: problem connecting socket to %s\n",
-			addr);
-		return ret;
-	}
-
-	return v9fs_socket_open(v9ses, csocket);
-}
-
-static int
-v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
-{
-	int ret;
-	struct socket *csocket;
-	struct sockaddr_un sun_server;
-
-	if (strlen(addr) > UNIX_PATH_MAX) {
-		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
-			addr);
-		return -ENAMETOOLONG;
-	}
-
-	sun_server.sun_family = PF_UNIX;
-	strcpy(sun_server.sun_path, addr);
-	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
-	ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
-			sizeof(struct sockaddr_un) - 1, 0);
-	if (ret < 0) {
-		eprintk(KERN_ERR,
-			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
-			addr, ret);
-		return ret;
-	}
-
-	return v9fs_socket_open(v9ses, csocket);
-}
-
-/**
- * v9fs_sock_close - shutdown socket
- * @trans: private socket structure
- *
- */
-static void v9fs_fd_close(struct v9fs_transport *trans)
-{
-	struct v9fs_trans_fd *ts;
-
-	if (!trans)
-		return;
-
-	ts = xchg(&trans->priv, NULL);
-
-	if (!ts)
-		return;
-
-	trans->status = Disconnected;
-	if (ts->rd)
-		fput(ts->rd);
-	if (ts->wr)
-		fput(ts->wr);
-	kfree(ts);
-}
-
-struct v9fs_transport v9fs_trans_fd = {
-	.init = v9fs_fd_init,
-	.write = v9fs_fd_write,
-	.read = v9fs_fd_read,
-	.close = v9fs_fd_close,
-	.poll = v9fs_fd_poll,
-};
-
-struct v9fs_transport v9fs_trans_tcp = {
-	.init = v9fs_tcp_init,
-	.write = v9fs_fd_write,
-	.read = v9fs_fd_read,
-	.close = v9fs_fd_close,
-	.poll = v9fs_fd_poll,
-};
-
-struct v9fs_transport v9fs_trans_unix = {
-	.init = v9fs_unix_init,
-	.write = v9fs_fd_write,
-	.read = v9fs_fd_read,
-	.close = v9fs_fd_close,
-	.poll = v9fs_fd_poll,
-};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
deleted file mode 100644
index b38a4b8a41c..00000000000
--- a/fs/9p/transport.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/fs/9p/transport.h
- *
- * Transport Definition
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-enum v9fs_transport_status {
-	Connected,
-	Disconnected,
-	Hung,
-};
-
-struct v9fs_transport {
-	enum v9fs_transport_status status;
-	void *priv;
-
-	int (*init) (struct v9fs_session_info *, const char *, char *);
-	int (*write) (struct v9fs_transport *, void *, int);
-	int (*read) (struct v9fs_transport *, void *, int);
-	void (*close) (struct v9fs_transport *);
-	unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
-};
-
-extern struct v9fs_transport v9fs_trans_tcp;
-extern struct v9fs_transport v9fs_trans_unix;
-extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6ad6f192b6e..45c35986d49 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,16 +29,12 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
-
-#include "debug.h"
+#include <net/9p/9p.h>
+#include <net/9p/transport.h>
+#include <net/9p/conn.h>
+#include <net/9p/client.h>
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
-#include "transport.h"
-#include "mux.h"
-
-/* TODO: sysfs or debugfs interface */
-int v9fs_debug_level = 0;	/* feature-rific global debug level  */
 
 /*
   * Option Parsing (code inspired by NFS code)
@@ -47,12 +43,12 @@ int v9fs_debug_level = 0;	/* feature-rific global debug level  */
 
 enum {
 	/* Options that take integer arguments */
-	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+	Opt_debug, Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid,
 	Opt_rfdno, Opt_wfdno,
 	/* String options */
 	Opt_uname, Opt_remotename,
 	/* Options that take no arguments */
-	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci,
 	/* Cache options */
 	Opt_cache_loose,
 	/* Error token */
@@ -60,6 +56,7 @@ enum {
 };
 
 static match_table_t tokens = {
+	{Opt_debug, "debug=%x"},
 	{Opt_port, "port=%u"},
 	{Opt_msize, "msize=%u"},
 	{Opt_uid, "uid=%u"},
@@ -67,12 +64,14 @@ static match_table_t tokens = {
 	{Opt_afid, "afid=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
-	{Opt_debug, "debug=%x"},
 	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_unix, "proto=unix"},
 	{Opt_tcp, "proto=tcp"},
 	{Opt_fd, "proto=fd"},
+#ifdef CONFIG_PCI_9P
+	{Opt_pci, "proto=pci"},
+#endif
 	{Opt_tcp, "tcp"},
 	{Opt_unix, "unix"},
 	{Opt_fd, "fd"},
@@ -83,6 +82,8 @@ static match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
+extern struct p9_transport *p9pci_trans_create(void);
+
 /*
  *  Parse option string.
  */
@@ -122,12 +123,16 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		token = match_token(p, tokens, args);
 		if (token < Opt_uname) {
 			if ((ret = match_int(&args[0], &option)) < 0) {
-				dprintk(DEBUG_ERROR,
+				P9_DPRINTK(P9_DEBUG_ERROR,
 					"integer field, but no integer?\n");
 				continue;
 			}
 		}
 		switch (token) {
+		case Opt_debug:
+			v9ses->debug = option;
+			p9_debug_level = option;
+			break;
 		case Opt_port:
 			v9ses->port = option;
 			break;
@@ -149,15 +154,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 		case Opt_wfdno:
 			v9ses->wfdno = option;
 			break;
-		case Opt_debug:
-			v9ses->debug = option;
-			break;
 		case Opt_tcp:
 			v9ses->proto = PROTO_TCP;
 			break;
 		case Opt_unix:
 			v9ses->proto = PROTO_UNIX;
 			break;
+		case Opt_pci:
+			v9ses->proto = PROTO_PCI;
+			break;
 		case Opt_fd:
 			v9ses->proto = PROTO_FD;
 			break;
@@ -183,82 +188,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
 }
 
 /**
- * v9fs_inode2v9ses - safely extract v9fs session info from super block
- * @inode: inode to extract information from
- *
- * Paranoid function to extract v9ses information from superblock,
- * if anything is missing it will report an error.
- *
- */
-
-struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
-{
-	return (inode->i_sb->s_fs_info);
-}
-
-/**
- * v9fs_get_idpool - allocate numeric id from pool
- * @p - pool to allocate from
- *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-
-int v9fs_get_idpool(struct v9fs_idpool *p)
-{
-	int i = 0;
-	int error;
-
-retry:
-	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
-		return 0;
-
-	if (down_interruptible(&p->lock) == -EINTR) {
-		eprintk(KERN_WARNING, "Interrupted while locking\n");
-		return -1;
-	}
-
-	/* no need to store exactly p, we just need something non-null */
-	error = idr_get_new(&p->pool, p, &i);
-	up(&p->lock);
-
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
-		return -1;
-
-	return i;
-}
-
-/**
- * v9fs_put_idpool - release numeric id from pool
- * @p - pool to allocate from
- *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
- *            the lock included in struct idr?
- */
-
-void v9fs_put_idpool(int id, struct v9fs_idpool *p)
-{
-	if (down_interruptible(&p->lock) == -EINTR) {
-		eprintk(KERN_WARNING, "Interrupted while locking\n");
-		return;
-	}
-	idr_remove(&p->pool, id);
-	up(&p->lock);
-}
-
-/**
- * v9fs_check_idpool - check if the specified id is available
- * @id - id to check
- * @p - pool
- */
-int v9fs_check_idpool(int id, struct v9fs_idpool *p)
-{
-	return idr_find(&p->pool, id) != NULL;
-}
-
-/**
  * v9fs_session_init - initialize session
  * @v9ses: session information structure
  * @dev_name: device being mounted
@@ -266,25 +195,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p)
  *
  */
 
-int
-v9fs_session_init(struct v9fs_session_info *v9ses,
+struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		  const char *dev_name, char *data)
 {
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_transport *trans_proto;
-	int n = 0;
-	int newfid = -1;
 	int retval = -EINVAL;
-	struct v9fs_str *version;
+	struct p9_transport *trans;
+	struct p9_fid *fid;
 
 	v9ses->name = __getname();
 	if (!v9ses->name)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	v9ses->remotename = __getname();
 	if (!v9ses->remotename) {
 		__putname(v9ses->name);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	strcpy(v9ses->name, V9FS_DEFUSER);
@@ -292,130 +217,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 
 	v9fs_parse_options(data, v9ses);
 
-	/* set global debug level */
-	v9fs_debug_level = v9ses->debug;
-
-	/* id pools that are session-dependent: fids and tags */
-	idr_init(&v9ses->fidpool.pool);
-	init_MUTEX(&v9ses->fidpool.lock);
-
 	switch (v9ses->proto) {
 	case PROTO_TCP:
-		trans_proto = &v9fs_trans_tcp;
+		trans = p9_trans_create_tcp(dev_name, v9ses->port);
 		break;
 	case PROTO_UNIX:
-		trans_proto = &v9fs_trans_unix;
+		trans = p9_trans_create_unix(dev_name);
 		*v9ses->remotename = 0;
 		break;
 	case PROTO_FD:
-		trans_proto = &v9fs_trans_fd;
+		trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno);
 		*v9ses->remotename = 0;
 		break;
+#ifdef CONFIG_PCI_9P
+	case PROTO_PCI:
+		trans = p9pci_trans_create();
+		*v9ses->remotename = 0;
+		break;
+#endif
 	default:
 		printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
 		retval = -ENOPROTOOPT;
-		goto SessCleanUp;
+		goto error;
 	};
 
-	v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL);
-	if (!v9ses->transport) {
-		retval = -ENOMEM;
-		goto SessCleanUp;
+	if (IS_ERR(trans)) {
+		retval = PTR_ERR(trans);
+		trans = NULL;
+		goto error;
 	}
 
-	memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport));
+	v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ,
+		v9ses->extended);
 
-	if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
-		eprintk(KERN_ERR, "problem initializing transport\n");
-		goto SessCleanUp;
+	if (IS_ERR(v9ses->clnt)) {
+		retval = PTR_ERR(v9ses->clnt);
+		v9ses->clnt = NULL;
+		P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n");
+		goto error;
 	}
 
-	v9ses->inprogress = 0;
-	v9ses->shutdown = 0;
-	v9ses->session_hung = 0;
-
-	v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
-		&v9ses->extended);
-
-	if (IS_ERR(v9ses->mux)) {
-		retval = PTR_ERR(v9ses->mux);
-		v9ses->mux = NULL;
-		dprintk(DEBUG_ERROR, "problem initializing mux\n");
-		goto SessCleanUp;
+	fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name,
+							v9ses->remotename);
+	if (IS_ERR(fid)) {
+		retval = PTR_ERR(fid);
+		fid = NULL;
+		P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n");
+		goto error;
 	}
 
-	if (v9ses->afid == ~0) {
-		if (v9ses->extended)
-			retval =
-			    v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
-					   &fcall);
-		else
-			retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
-						&fcall);
-
-		if (retval < 0) {
-			dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
-			goto FreeFcall;
-		}
-
-		version = &fcall->params.rversion.version;
-		if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
-			dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
-			v9ses->extended = 1;
-		} else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
-			dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
-			v9ses->extended = 0;
-		} else {
-			retval = -EREMOTEIO;
-			goto FreeFcall;
-		}
+	return fid;
 
-		n = fcall->params.rversion.msize;
-		kfree(fcall);
-
-		if (n < v9ses->maxdata)
-			v9ses->maxdata = n;
-	}
-
-	newfid = v9fs_get_idpool(&v9ses->fidpool);
-	if (newfid < 0) {
-		eprintk(KERN_WARNING, "couldn't allocate FID\n");
-		retval = -ENOMEM;
-		goto SessCleanUp;
-	}
-	/* it is a little bit ugly, but we have to prevent newfid */
-	/* being the same as afid, so if it is, get a new fid     */
-	if (v9ses->afid != ~0 && newfid == v9ses->afid) {
-		newfid = v9fs_get_idpool(&v9ses->fidpool);
-		if (newfid < 0) {
-			eprintk(KERN_WARNING, "couldn't allocate FID\n");
-			retval = -ENOMEM;
-			goto SessCleanUp;
-		}
-	}
-
-	if ((retval =
-	     v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
-			   v9ses->afid, NULL))
-	    < 0) {
-		dprintk(DEBUG_ERROR, "cannot attach\n");
-		goto SessCleanUp;
-	}
-
-	if (v9ses->afid != ~0) {
-		dprintk(DEBUG_ERROR, "afid not equal to ~0\n");
-		if (v9fs_t_clunk(v9ses, v9ses->afid))
-			dprintk(DEBUG_ERROR, "clunk failed\n");
-	}
-
-	return newfid;
-
-      FreeFcall:
-	kfree(fcall);
-
-      SessCleanUp:
+error:
 	v9fs_session_close(v9ses);
-	return retval;
+	return ERR_PTR(retval);
 }
 
 /**
@@ -426,15 +281,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 
 void v9fs_session_close(struct v9fs_session_info *v9ses)
 {
-	if (v9ses->mux) {
-		v9fs_mux_destroy(v9ses->mux);
-		v9ses->mux = NULL;
-	}
-
-	if (v9ses->transport) {
-		v9ses->transport->close(v9ses->transport);
-		kfree(v9ses->transport);
-		v9ses->transport = NULL;
+	if (v9ses->clnt) {
+		p9_client_destroy(v9ses->clnt);
+		v9ses->clnt = NULL;
 	}
 
 	__putname(v9ses->name);
@@ -446,9 +295,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
  * 	and cancel all pending requests.
  */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
-	dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
-	v9ses->transport->status = Disconnected;
-	v9fs_mux_cancel(v9ses->mux, -EIO);
+	P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
+	p9_client_disconnect(v9ses->clnt);
 }
 
 extern int v9fs_error_init(void);
@@ -460,24 +308,9 @@ extern int v9fs_error_init(void);
 
 static int __init init_v9fs(void)
 {
-	int ret;
-
-	v9fs_error_init();
-
 	printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
 
-	ret = v9fs_mux_global_init();
-	if (ret) {
-		printk(KERN_WARNING "v9fs: starting mux failed\n");
-		return ret;
-	}
-	ret = register_filesystem(&v9fs_fs_type);
-	if (ret) {
-		printk(KERN_WARNING "v9fs: registering file system failed\n");
-		v9fs_mux_global_exit();
-	}
-
-	return ret;
+	return register_filesystem(&v9fs_fs_type);
 }
 
 /**
@@ -487,13 +320,13 @@ static int __init init_v9fs(void)
 
 static void __exit exit_v9fs(void)
 {
-	v9fs_mux_global_exit();
 	unregister_filesystem(&v9fs_fs_type);
 }
 
 module_init(init_v9fs)
 module_exit(exit_v9fs)
 
+MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
 MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
 MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
 MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 820bf5ca35d..abc4b1668ac 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -22,16 +22,6 @@
  */
 
 /*
-  * Idpool structure provides lock and id management
-  *
-  */
-
-struct v9fs_idpool {
-	struct semaphore lock;
-	struct idr pool;
-};
-
-/*
   * Session structure provides information for an opened session
   *
   */
@@ -54,15 +44,7 @@ struct v9fs_session_info {
 	unsigned int uid;	/* default uid/muid for legacy support */
 	unsigned int gid;	/* default gid for legacy support */
 
-	/* book keeping */
-	struct v9fs_idpool fidpool;	/* The FID pool for file descriptors */
-
-	struct v9fs_transport *transport;
-	struct v9fs_mux_data *mux;
-
-	int inprogress;		/* session in progress => true */
-	int shutdown;		/* session shutting down. no more attaches. */
-	unsigned char session_hung;
+	struct p9_client *clnt;	/* 9p client */
 	struct dentry *debugfs_dir;
 };
 
@@ -71,6 +53,7 @@ enum {
 	PROTO_TCP,
 	PROTO_UNIX,
 	PROTO_FD,
+	PROTO_PCI,
 };
 
 /* possible values of ->cache */
@@ -82,12 +65,9 @@ enum {
 
 extern struct dentry *v9fs_debugfs_root;
 
-int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
-struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
+									char *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
-int v9fs_get_idpool(struct v9fs_idpool *p);
-void v9fs_put_idpool(int id, struct v9fs_idpool *p);
-int v9fs_check_idpool(int id, struct v9fs_idpool *p);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 
 #define V9FS_MAGIC 0x01021997
@@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_DEFUSER	"nobody"
 #define V9FS_DEFANAME	""
 
+static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+	return (inode->i_sb->s_fs_info);
+}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 6a82d39dc49..fd01d90cada 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations;
 extern struct dentry_operations v9fs_cached_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
-ino_t v9fs_qid2ino(struct v9fs_qid *qid);
-void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
+ino_t v9fs_qid2ino(struct p9_qid *qid);
+void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
+void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
 void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9ac4ffe9ac7..6248f0e727a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,10 +33,10 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -50,55 +50,26 @@
 
 static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 {
-	char *buffer = NULL;
-	int retval = -EIO;
-	loff_t offset = page_offset(page);
-	int count = PAGE_CACHE_SIZE;
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-	struct v9fs_fid *v9f = filp->private_data;
-	struct v9fs_fcall *fcall = NULL;
-	int fid = v9f->fid;
-	int total = 0;
-	int result = 0;
-
-	dprintk(DEBUG_VFS, "\n");
+	int retval;
+	loff_t offset;
+	char *buffer;
+	struct p9_fid *fid;
 
+	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	fid = filp->private_data;
 	buffer = kmap(page);
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
-
-		if (result < 0) {
-			printk(KERN_ERR "v9fs_t_read returned %d\n",
-			       result);
-
-			kfree(fcall);
-			goto UnmapAndUnlock;
-		} else
-			offset += result;
-
-		memcpy(buffer, fcall->params.rread.data, result);
-
-		count -= result;
-		buffer += result;
-		total += result;
-
-		kfree(fcall);
+	offset = page_offset(page);
 
-		if (result < rsize)
-			break;
-	} while (count);
+	retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
+	if (retval < 0)
+		goto done;
 
-	memset(buffer, 0, count);
+	memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	retval = 0;
 
-UnmapAndUnlock:
+done:
 	kunmap(page);
 	unlock_page(page);
 	return retval;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d93960429c0..f9534f18df0 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -34,10 +34,10 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -52,7 +52,7 @@
 
 static int v9fs_dentry_delete(struct dentry *dentry)
 {
-	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
 	return 1;
 }
@@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
 static int v9fs_cached_dentry_delete(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
 	if(!inode)
 		return 1;
@@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry)
 
 void v9fs_dentry_release(struct dentry *dentry)
 {
-	int err;
-
-	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
-
-	if (dentry->d_fsdata != NULL) {
-		struct list_head *fid_list = dentry->d_fsdata;
-		struct v9fs_fid *temp = NULL;
-		struct v9fs_fid *current_fid = NULL;
-
-		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-			err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
-
-			if (err < 0)
-				dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
-					err, dentry->d_iname);
-
-			v9fs_fid_destroy(current_fid);
+	struct v9fs_dentry *dent;
+	struct p9_fid *temp, *current_fid;
+
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	dent = dentry->d_fsdata;
+	if (dent) {
+		list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
+									dlist) {
+			p9_client_clunk(current_fid);
 		}
 
-		kfree(dentry->d_fsdata);	/* free the list_head */
+		kfree(dent);
+		dentry->d_fsdata = NULL;
 	}
 }
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 1dd86ee90bc..0924d4477da 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -32,11 +32,10 @@
 #include <linux/sched.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
-#include "conv.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -46,14 +45,14 @@
  *
  */
 
-static inline int dt_type(struct v9fs_stat *mistat)
+static inline int dt_type(struct p9_stat *mistat)
 {
 	unsigned long perm = mistat->mode;
 	int rettype = DT_REG;
 
-	if (perm & V9FS_DMDIR)
+	if (perm & P9_DMDIR)
 		rettype = DT_DIR;
-	if (perm & V9FS_DMSYMLINK)
+	if (perm & P9_DMSYMLINK)
 		rettype = DT_LNK;
 
 	return rettype;
@@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat)
 
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct v9fs_fcall *fcall = NULL;
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *file = filp->private_data;
-	unsigned int i, n, s;
-	int fid = -1;
-	int ret = 0;
-	struct v9fs_stat stat;
-	int over = 0;
-
-	dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
-
-	fid = file->fid;
-
-	if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
-		kfree(file->rdir_fcall);
-		file->rdir_fcall = NULL;
-	}
-
-	if (file->rdir_fcall) {
-		n = file->rdir_fcall->params.rread.count;
-		i = file->rdir_fpos;
-		while (i < n) {
-			s = v9fs_deserialize_stat(
-				file->rdir_fcall->params.rread.data + i,
-				n - i, &stat, v9ses->extended);
-
-			if (s == 0) {
-				dprintk(DEBUG_ERROR,
-					"error while deserializing stat\n");
-				ret = -EIO;
-				goto FreeStructs;
-			}
-
-			over = filldir(dirent, stat.name.str, stat.name.len,
-				    filp->f_pos, v9fs_qid2ino(&stat.qid),
-				    dt_type(&stat));
-
-			if (over) {
-				file->rdir_fpos = i;
-				file->rdir_pos = filp->f_pos;
-				break;
-			}
-
-			i += s;
-			filp->f_pos += s;
-		}
-
-		if (!over) {
-			kfree(file->rdir_fcall);
-			file->rdir_fcall = NULL;
-		}
-	}
-
-	while (!over) {
-		ret = v9fs_t_read(v9ses, fid, filp->f_pos,
-			v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
-		if (ret < 0) {
-			dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
-				ret, fcall);
-			goto FreeStructs;
-		} else if (ret == 0)
+	int over;
+	struct p9_fid *fid;
+	struct v9fs_session_info *v9ses;
+	struct inode *inode;
+	struct p9_stat *st;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+	inode = filp->f_path.dentry->d_inode;
+	v9ses = v9fs_inode2v9ses(inode);
+	fid = filp->private_data;
+	while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
+		if (IS_ERR(st))
+			return PTR_ERR(st);
+
+		over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
+			v9fs_qid2ino(&st->qid), dt_type(st));
+
+		if (over)
 			break;
 
-		n = ret;
-		i = 0;
-		while (i < n) {
-			s = v9fs_deserialize_stat(fcall->params.rread.data + i,
-				n - i, &stat, v9ses->extended);
-
-			if (s == 0) {
-				dprintk(DEBUG_ERROR,
-					"error while deserializing stat\n");
-				return -EIO;
-			}
-
-			over = filldir(dirent, stat.name.str, stat.name.len,
-				    filp->f_pos, v9fs_qid2ino(&stat.qid),
-				    dt_type(&stat));
-
-			if (over) {
-				file->rdir_fcall = fcall;
-				file->rdir_fpos = i;
-				file->rdir_pos = filp->f_pos;
-				fcall = NULL;
-				break;
-			}
-
-			i += s;
-			filp->f_pos += s;
-		}
-
-		kfree(fcall);
+		filp->f_pos += st->size;
+		kfree(st);
+		st = NULL;
 	}
 
-      FreeStructs:
-	kfree(fcall);
-	return ret;
+	kfree(st);
+	return 0;
 }
 
+
 /**
  * v9fs_dir_release - close a directory
  * @inode: inode of the directory
@@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 int v9fs_dir_release(struct inode *inode, struct file *filp)
 {
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *fid = filp->private_data;
-	int fidnum = -1;
-
-	dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
-		fid->fid);
-	fidnum = fid->fid;
+	struct p9_fid *fid;
 
+	fid = filp->private_data;
+	P9_DPRINTK(P9_DEBUG_VFS,
+			"inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
 	filemap_write_and_wait(inode->i_mapping);
-
-	if (fidnum >= 0) {
-		dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
-			fid->fid);
-
-		if (v9fs_t_clunk(v9ses, fidnum))
-			dprintk(DEBUG_ERROR, "clunk failed\n");
-
-		kfree(fid->rdir_fcall);
-		kfree(fid);
-
-		filp->private_data = NULL;
-	}
-
+	p9_client_clunk(fid);
 	return 0;
 }
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6e7678e4852..2a40c2946d0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -34,10 +34,10 @@
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -52,48 +52,40 @@ static const struct file_operations v9fs_cached_file_operations;
 
 int v9fs_file_open(struct inode *inode, struct file *file)
 {
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *vfid;
-	struct v9fs_fcall *fcall = NULL;
-	int omode;
 	int err;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	int omode;
 
-	dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
-
-	vfid = v9fs_fid_clone(file->f_path.dentry);
-	if (IS_ERR(vfid))
-		return PTR_ERR(vfid);
-
+	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+	v9ses = v9fs_inode2v9ses(inode);
 	omode = v9fs_uflags2omode(file->f_flags);
-	err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("open failed", fcall);
-		goto Clunk_Fid;
+	fid = file->private_data;
+	if (!fid) {
+		fid = v9fs_fid_clone(file->f_path.dentry);
+		if (IS_ERR(fid))
+			return PTR_ERR(fid);
+
+		err = p9_client_open(fid, omode);
+		if (err < 0) {
+			p9_client_clunk(fid);
+			return err;
+		}
+		if (omode & P9_OTRUNC) {
+			inode->i_size = 0;
+			inode->i_blocks = 0;
+		}
 	}
 
-	file->private_data = vfid;
-	vfid->fidopen = 1;
-	vfid->fidclunked = 0;
-	vfid->iounit = fcall->params.ropen.iounit;
-	vfid->rdir_pos = 0;
-	vfid->rdir_fcall = NULL;
-	vfid->filp = file;
-	kfree(fcall);
-
-	if((vfid->qid.version) && (v9ses->cache)) {
-		dprintk(DEBUG_VFS, "cached");
+	file->private_data = fid;
+	if ((fid->qid.version) && (v9ses->cache)) {
+		P9_DPRINTK(P9_DEBUG_VFS, "cached");
 		/* enable cached file options */
 		if(file->f_op == &v9fs_file_operations)
 			file->f_op = &v9fs_cached_file_operations;
 	}
 
 	return 0;
-
-Clunk_Fid:
-	v9fs_fid_clunk(v9ses, vfid);
-	kfree(fcall);
-
-	return err;
 }
 
 /**
@@ -110,7 +102,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 	int res = 0;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 
-	dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+	P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
 
 	/* No mandatory locks */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
@@ -136,55 +128,16 @@ static ssize_t
 v9fs_file_read(struct file *filp, char __user * data, size_t count,
 	       loff_t * offset)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *v9f = filp->private_data;
-	struct v9fs_fcall *fcall = NULL;
-	int fid = v9f->fid;
-	int rsize = 0;
-	int result = 0;
-	int total = 0;
-	int n;
-
-	dprintk(DEBUG_VFS, "\n");
-
-	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-	if (v9f->iounit != 0 && rsize > v9f->iounit)
-		rsize = v9f->iounit;
-
-	do {
-		if (count < rsize)
-			rsize = count;
+	int ret;
+	struct p9_fid *fid;
 
-		result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	fid = filp->private_data;
+	ret = p9_client_uread(fid, data, *offset, count);
+	if (ret > 0)
+		*offset += ret;
 
-		if (result < 0) {
-			printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
-			       result);
-
-			kfree(fcall);
-			return total;
-		} else
-			*offset += result;
-
-		n = copy_to_user(data, fcall->params.rread.data, result);
-		if (n) {
-			dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n);
-			kfree(fcall);
-			return -EFAULT;
-		}
-
-		count -= result;
-		data += result;
-		total += result;
-
-		kfree(fcall);
-
-		if (result < rsize)
-			break;
-	} while (count);
-
-	return total;
+	return ret;
 }
 
 /**
@@ -200,50 +153,25 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
 		size_t count, loff_t * offset)
 {
+	int ret;
+	struct p9_fid *fid;
 	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *v9fid = filp->private_data;
-	struct v9fs_fcall *fcall;
-	int fid = v9fid->fid;
-	int result = -EIO;
-	int rsize = 0;
-	int total = 0;
-
-	dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
-		(int)*offset);
-	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
-	if (v9fid->iounit != 0 && rsize > v9fid->iounit)
-		rsize = v9fid->iounit;
-
-	do {
-		if (count < rsize)
-			rsize = count;
 
-		result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
-		if (result < 0) {
-			PRINT_FCALL_ERROR("error while writing", fcall);
-			kfree(fcall);
-			return result;
-		} else
-			*offset += result;
+	P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
+		(int)count, (int)*offset);
 
-		kfree(fcall);
-		fcall = NULL;
+	fid = filp->private_data;
+	ret = p9_client_uwrite(fid, data, *offset, count);
+	if (ret > 0)
+		*offset += ret;
 
-		if (result != rsize) {
-			eprintk(KERN_ERR,
-				"short write: v9fs_t_write returned %d\n",
-				result);
-			break;
-		}
-
-		count -= result;
-		data += result;
-		total += result;
-	} while (count);
+	if (*offset > inode->i_size) {
+		inode->i_size = *offset;
+		inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
+	}
 
 	invalidate_inode_pages2(inode->i_mapping);
-	return total;
+	return ret;
 }
 
 static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c76cd8fa3f6..e5c45eed58a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -34,10 +34,10 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
 	int res;
 	res = mode & 0777;
 	if (S_ISDIR(mode))
-		res |= V9FS_DMDIR;
+		res |= P9_DMDIR;
 	if (v9ses->extended) {
 		if (S_ISLNK(mode))
-			res |= V9FS_DMSYMLINK;
+			res |= P9_DMSYMLINK;
 		if (v9ses->nodev == 0) {
 			if (S_ISSOCK(mode))
-				res |= V9FS_DMSOCKET;
+				res |= P9_DMSOCKET;
 			if (S_ISFIFO(mode))
-				res |= V9FS_DMNAMEDPIPE;
+				res |= P9_DMNAMEDPIPE;
 			if (S_ISBLK(mode))
-				res |= V9FS_DMDEVICE;
+				res |= P9_DMDEVICE;
 			if (S_ISCHR(mode))
-				res |= V9FS_DMDEVICE;
+				res |= P9_DMDEVICE;
 		}
 
 		if ((mode & S_ISUID) == S_ISUID)
-			res |= V9FS_DMSETUID;
+			res |= P9_DMSETUID;
 		if ((mode & S_ISGID) == S_ISGID)
-			res |= V9FS_DMSETGID;
-		if ((mode & V9FS_DMLINK))
-			res |= V9FS_DMLINK;
+			res |= P9_DMSETGID;
+		if ((mode & P9_DMLINK))
+			res |= P9_DMLINK;
 	}
 
 	return res;
@@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 
 	res = mode & 0777;
 
-	if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+	if ((mode & P9_DMDIR) == P9_DMDIR)
 		res |= S_IFDIR;
-	else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+	else if ((mode & P9_DMSYMLINK) && (v9ses->extended))
 		res |= S_IFLNK;
-	else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+	else if ((mode & P9_DMSOCKET) && (v9ses->extended)
 		 && (v9ses->nodev == 0))
 		res |= S_IFSOCK;
-	else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+	else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended)
 		 && (v9ses->nodev == 0))
 		res |= S_IFIFO;
-	else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+	else if ((mode & P9_DMDEVICE) && (v9ses->extended)
 		 && (v9ses->nodev == 0))
 		res |= S_IFBLK;
 	else
 		res |= S_IFREG;
 
 	if (v9ses->extended) {
-		if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+		if ((mode & P9_DMSETUID) == P9_DMSETUID)
 			res |= S_ISUID;
 
-		if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+		if ((mode & P9_DMSETGID) == P9_DMSETGID)
 			res |= S_ISGID;
 	}
 
@@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags)
 	switch (uflags&3) {
 	default:
 	case O_RDONLY:
-		ret = V9FS_OREAD;
+		ret = P9_OREAD;
 		break;
 
 	case O_WRONLY:
-		ret = V9FS_OWRITE;
+		ret = P9_OWRITE;
 		break;
 
 	case O_RDWR:
-		ret = V9FS_ORDWR;
+		ret = P9_ORDWR;
 		break;
 	}
 
 	if (uflags & O_EXCL)
-		ret |= V9FS_OEXCL;
+		ret |= P9_OEXCL;
 
 	if (uflags & O_TRUNC)
-		ret |= V9FS_OTRUNC;
+		ret |= P9_OTRUNC;
 
 	if (uflags & O_APPEND)
-		ret |= V9FS_OAPPEND;
+		ret |= P9_OAPPEND;
 
 	return ret;
 }
@@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags)
  */
 
 static void
-v9fs_blank_wstat(struct v9fs_wstat *wstat)
+v9fs_blank_wstat(struct p9_wstat *wstat)
 {
 	wstat->type = ~0;
 	wstat->dev = ~0;
@@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 	struct inode *inode;
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
-	dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+	P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 		case S_IFCHR:
 		case S_IFSOCK:
 			if(!v9ses->extended) {
-				dprintk(DEBUG_ERROR, "special files without extended mode\n");
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				      "special files without extended mode\n");
 				return ERR_PTR(-EINVAL);
 			}
 			init_special_inode(inode, inode->i_mode,
@@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 			break;
 		case S_IFLNK:
 			if(!v9ses->extended) {
-				dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+				P9_DPRINTK(P9_DEBUG_ERROR,
+					"extended modes used w/o 9P2000.u\n");
 				return ERR_PTR(-EINVAL);
 			}
 			inode->i_op = &v9fs_symlink_inode_operations;
@@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 			inode->i_fop = &v9fs_dir_operations;
 			break;
 		default:
-			dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"BAD mode 0x%x S_IFMT 0x%x\n",
 				mode, mode & S_IFMT);
 			return ERR_PTR(-EINVAL);
 		}
 	} else {
-		eprintk(KERN_WARNING, "Problem allocating inode\n");
+		P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
 		return ERR_PTR(-ENOMEM);
 	}
 	return inode;
 }
 
-static int
-v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
-	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
-{
-	int fid;
-	int err;
-	struct v9fs_fcall *fcall;
-
-	fid = v9fs_get_idpool(&v9ses->fidpool);
-	if (fid < 0) {
-		eprintk(KERN_WARNING, "no free fids available\n");
-		return -ENOSPC;
-	}
-
-	err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("clone error", fcall);
-		if (fcall && fcall->id == RWALK)
-			goto clunk_fid;
-		else
-			goto put_fid;
-	}
-	kfree(fcall);
-
-	err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
-	if (err < 0) {
-		PRINT_FCALL_ERROR("create fails", fcall);
-		goto clunk_fid;
-	}
-
-	if (iounit)
-		*iounit = fcall->params.rcreate.iounit;
-
-	if (qid)
-		*qid = fcall->params.rcreate.qid;
-
-	if (fidp)
-		*fidp = fid;
-
-	kfree(fcall);
-	return 0;
-
-clunk_fid:
-	v9fs_t_clunk(v9ses, fid);
-	fid = V9FS_NOFID;
-
-put_fid:
-	if (fid != V9FS_NOFID)
-		v9fs_put_idpool(fid, &v9ses->fidpool);
-
-	kfree(fcall);
-	return err;
-}
-
+/*
 static struct v9fs_fid*
 v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
 {
@@ -355,23 +305,25 @@ error:
 	kfree(fcall);
 	return ERR_PTR(err);
 }
+*/
 
 static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	struct super_block *sb)
 {
 	int err, umode;
 	struct inode *ret;
-	struct v9fs_fcall *fcall;
+	struct p9_stat *st;
 
 	ret = NULL;
-	err = v9fs_t_stat(v9ses, fid, &fcall);
-	if (err) {
-		PRINT_FCALL_ERROR("stat error", fcall);
+	st = p9_client_stat(fid);
+	if (IS_ERR(st)) {
+		err = PTR_ERR(st);
+		st = NULL;
 		goto error;
 	}
 
-	umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode);
+	umode = p9mode2unixmode(v9ses, st->mode);
 	ret = v9fs_get_inode(sb, umode);
 	if (IS_ERR(ret)) {
 		err = PTR_ERR(ret);
@@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
 		goto error;
 	}
 
-	v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb);
-	kfree(fcall);
+	v9fs_stat2inode(st, ret, sb);
+	ret->i_ino = v9fs_qid2ino(&st->qid);
+	kfree(st);
 	return ret;
 
 error:
-	kfree(fcall);
+	kfree(st);
 	if (ret)
 		iput(ret);
 
@@ -401,43 +354,20 @@ error:
 
 static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 {
-	struct v9fs_fcall *fcall = NULL;
-	struct super_block *sb = NULL;
-	struct v9fs_session_info *v9ses = NULL;
-	struct v9fs_fid *v9fid = NULL;
-	struct inode *file_inode = NULL;
-	int fid = -1;
-	int result = 0;
+	struct inode *file_inode;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *v9fid;
 
-	dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
 		rmdir);
 
 	file_inode = file->d_inode;
-	sb = file_inode->i_sb;
 	v9ses = v9fs_inode2v9ses(file_inode);
 	v9fid = v9fs_fid_clone(file);
 	if(IS_ERR(v9fid))
 		return PTR_ERR(v9fid);
 
-	fid = v9fid->fid;
-	if (fid < 0) {
-		dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
-			file_inode->i_ino);
-		return -EBADF;
-	}
-
-	result = v9fs_t_remove(v9ses, fid, &fcall);
-	if (result < 0) {
-		PRINT_FCALL_ERROR("remove fails", fcall);
-		goto Error;
-	}
-
-	v9fs_put_idpool(fid, &v9ses->fidpool);
-	v9fs_fid_destroy(v9fid);
-
-Error:
-	kfree(fcall);
-	return result;
+	return p9_client_remove(v9fid);
 }
 
 static int
@@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file)
 	return 0;
 }
 
+
 /**
- * v9fs_vfs_create - VFS hook to create files
- * @inode: directory inode that is being deleted
- * @dentry:  dentry that is being deleted
- * @mode: create permissions
- * @nd: path information
+ * v9fs_create - Create a file
+ * @dentry:  dentry that is being created
+ * @perm: create permissions
+ * @mode: open mode
  *
  */
-
-static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
+static struct p9_fid *
+v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
+		struct dentry *dentry, char *extension, u32 perm, u8 mode)
 {
 	int err;
-	u32 fid, perm, iounit;
-	int flags;
-	struct v9fs_session_info *v9ses;
-	struct v9fs_fid *dfid, *vfid, *ffid;
+	char *name;
+	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
-	struct v9fs_qid qid;
-	struct file *filp;
 
-	inode = NULL;
-	vfid = NULL;
-	v9ses = v9fs_inode2v9ses(dir);
+	err = 0;
+	ofid = NULL;
+	fid = NULL;
+	name = (char *) dentry->d_name.name;
 	dfid = v9fs_fid_clone(dentry->d_parent);
 	if(IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
+		dfid = NULL;
 		goto error;
 	}
 
-	perm = unixmode2p9mode(v9ses, mode);
-	if (nd && nd->flags & LOOKUP_OPEN)
-		flags = nd->intent.open.flags - 1;
-	else
-		flags = O_RDWR;
-
-	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
+	/* clone a fid to use for creation */
+	ofid = p9_client_walk(dfid, 0, NULL, 1);
+	if (IS_ERR(ofid)) {
+		err = PTR_ERR(ofid);
+		ofid = NULL;
+		goto error;
+	}
 
-	if (err)
-		goto clunk_dfid;
+	err = p9_client_fcreate(ofid, name, perm, mode, extension);
+	if (err < 0)
+		goto error;
 
-	vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
-	v9fs_fid_clunk(v9ses, dfid);
-	if (IS_ERR(vfid)) {
-		err = PTR_ERR(vfid);
-		vfid = NULL;
+	/* now walk from the parent so we can get unopened fid */
+	fid = p9_client_walk(dfid, 1, &name, 0);
+	if (IS_ERR(fid)) {
+		err = PTR_ERR(fid);
+		fid = NULL;
 		goto error;
-	}
+	} else
+		dfid = NULL;
 
-	inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
+	/* instantiate inode and assign the unopened fid to the dentry */
+	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
-		inode = NULL;
 		goto error;
 	}
 
@@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		dentry->d_op = &v9fs_cached_dentry_operations;
 	else
 		dentry->d_op = &v9fs_dentry_operations;
+
 	d_instantiate(dentry, inode);
+	v9fs_fid_add(dentry, fid);
+	return ofid;
 
-	if (nd && nd->flags & LOOKUP_OPEN) {
-		ffid = v9fs_fid_create(v9ses, fid);
-		if (!ffid)
-			return -ENOMEM;
+error:
+	if (dfid)
+		p9_client_clunk(dfid);
+
+	if (ofid)
+		p9_client_clunk(ofid);
+
+	if (fid)
+		p9_client_clunk(fid);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being created
+ * @dentry:  dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
 
+static int
+v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	int err;
+	u32 perm;
+	int flags;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct file *filp;
+
+	err = 0;
+	fid = NULL;
+	v9ses = v9fs_inode2v9ses(dir);
+	perm = unixmode2p9mode(v9ses, mode);
+	if (nd && nd->flags & LOOKUP_OPEN)
+		flags = nd->intent.open.flags - 1;
+	else
+		flags = O_RDWR;
+
+	fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+						v9fs_uflags2omode(flags));
+	if (IS_ERR(fid)) {
+		err = PTR_ERR(fid);
+		fid = NULL;
+		goto error;
+	}
+
+	/* if we are opening a file, assign the open fid to the file */
+	if (nd && nd->flags & LOOKUP_OPEN) {
 		filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
 		if (IS_ERR(filp)) {
-			v9fs_fid_destroy(ffid);
-			return PTR_ERR(filp);
+			err = PTR_ERR(filp);
+			goto error;
 		}
 
-		ffid->rdir_pos = 0;
-		ffid->rdir_fcall = NULL;
-		ffid->fidopen = 1;
-		ffid->iounit = iounit;
-		ffid->filp = filp;
-		filp->private_data = ffid;
-	}
+		filp->private_data = fid;
+	} else
+		p9_client_clunk(fid);
 
 	return 0;
 
-clunk_dfid:
-	v9fs_fid_clunk(v9ses, dfid);
-
 error:
-	if (vfid)
-		v9fs_fid_destroy(vfid);
+	if (fid)
+		p9_client_clunk(fid);
 
 	return err;
 }
@@ -552,57 +523,23 @@ error:
 static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	int err;
-	u32 fid, perm;
+	u32 perm;
 	struct v9fs_session_info *v9ses;
-	struct v9fs_fid *dfid, *vfid;
-	struct inode *inode;
+	struct p9_fid *fid;
 
-	inode = NULL;
-	vfid = NULL;
+	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+	err = 0;
 	v9ses = v9fs_inode2v9ses(dir);
-	dfid = v9fs_fid_clone(dentry->d_parent);
-	if(IS_ERR(dfid)) {
-		err = PTR_ERR(dfid);
-		goto error;
-	}
-
 	perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
-
-	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, NULL, &fid, NULL, NULL);
-
-	if (err) {
-		dprintk(DEBUG_ERROR, "create error %d\n", err);
-		goto clean_up_dfid;
+	fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD);
+	if (IS_ERR(fid)) {
+		err = PTR_ERR(fid);
+		fid = NULL;
 	}
 
-	vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
-	if (IS_ERR(vfid)) {
-		err = PTR_ERR(vfid);
-		vfid = NULL;
-		goto clean_up_dfid;
-	}
+	if (fid)
+		p9_client_clunk(fid);
 
-	v9fs_fid_clunk(v9ses, dfid);
-	inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		inode = NULL;
-		v9fs_fid_destroy(vfid);
-		goto error;
-	}
-
-	if(v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
-	else
-		dentry->d_op = &v9fs_dentry_operations;
-	d_instantiate(dentry, inode);
-	return 0;
-
-clean_up_dfid:
-	v9fs_fid_clunk(v9ses, dfid);
-
-error:
 	return err;
 }
 
@@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct super_block *sb;
 	struct v9fs_session_info *v9ses;
-	struct v9fs_fid *dirfid;
-	struct v9fs_fid *fid;
+	struct p9_fid *dfid, *fid;
 	struct inode *inode;
-	struct v9fs_fcall *fcall = NULL;
-	int dirfidnum = -1;
-	int newfid = -1;
+	char *name;
 	int result = 0;
 
-	dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+	P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
 		dir, dentry->d_name.name, dentry, nameidata);
 
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
-	dirfid = v9fs_fid_lookup(dentry->d_parent);
-
-	if(IS_ERR(dirfid))
-		return ERR_PTR(PTR_ERR(dirfid));
-
-	dirfidnum = dirfid->fid;
-
-	newfid = v9fs_get_idpool(&v9ses->fidpool);
-	if (newfid < 0) {
-		eprintk(KERN_WARNING, "newfid fails!\n");
-		result = -ENOSPC;
-		goto Release_Dirfid;
-	}
-
-	result = v9fs_t_walk(v9ses, dirfidnum, newfid,
-		(char *)dentry->d_name.name, &fcall);
-
-	up(&dirfid->lock);
-
-	if (result < 0) {
-		if (fcall && fcall->id == RWALK)
-			v9fs_t_clunk(v9ses, newfid);
-		else
-			v9fs_put_idpool(newfid, &v9ses->fidpool);
-
+	dfid = v9fs_fid_lookup(dentry->d_parent);
+	if (IS_ERR(dfid))
+		return ERR_PTR(PTR_ERR(dfid));
+
+	name = (char *) dentry->d_name.name;
+	fid = p9_client_walk(dfid, 1, &name, 1);
+	if (IS_ERR(fid)) {
+		result = PTR_ERR(fid);
 		if (result == -ENOENT) {
 			d_add(dentry, NULL);
-			dprintk(DEBUG_VFS,
-				"Return negative dentry %p count %d\n",
-				dentry, atomic_read(&dentry->d_count));
-			kfree(fcall);
 			return NULL;
 		}
-		dprintk(DEBUG_ERROR, "walk error:%d\n", result);
-		goto FreeFcall;
-	}
-	kfree(fcall);
-
-	result = v9fs_t_stat(v9ses, newfid, &fcall);
-	if (result < 0) {
-		dprintk(DEBUG_ERROR, "stat error\n");
-		goto FreeFcall;
-	}
-
-	inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
-		fcall->params.rstat.stat.mode));
 
-	if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
-		eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
-			PTR_ERR(inode));
-
-		result = -ENOSPC;
-		goto FreeFcall;
+		return ERR_PTR(result);
 	}
 
-	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
-
-	fid = v9fs_fid_create(v9ses, newfid);
-	if (fid == NULL) {
-		dprintk(DEBUG_ERROR, "couldn't insert\n");
-		result = -ENOMEM;
-		goto FreeFcall;
+	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+	if (IS_ERR(inode)) {
+		result = PTR_ERR(inode);
+		inode = NULL;
+		goto error;
 	}
 
-	result = v9fs_fid_insert(fid, dentry);
+	result = v9fs_fid_add(dentry, fid);
 	if (result < 0)
-		goto FreeFcall;
+		goto error;
 
-	fid->qid = fcall->params.rstat.stat.qid;
-	v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 	if((fid->qid.version)&&(v9ses->cache))
 		dentry->d_op = &v9fs_cached_dentry_operations;
 	else
 		dentry->d_op = &v9fs_dentry_operations;
 
 	d_add(dentry, inode);
-	kfree(fcall);
-
 	return NULL;
 
-Release_Dirfid:
-	up(&dirfid->lock);
-
-FreeFcall:
-	kfree(fcall);
+error:
+	if (fid)
+		p9_client_clunk(fid);
 
 	return ERR_PTR(result);
 }
@@ -758,73 +645,54 @@ static int
 v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		struct inode *new_dir, struct dentry *new_dentry)
 {
-	struct inode *old_inode = old_dentry->d_inode;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
-	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
-	struct v9fs_fid *olddirfid;
-	struct v9fs_fid *newdirfid;
-	struct v9fs_wstat wstat;
-	struct v9fs_fcall *fcall = NULL;
-	int fid = -1;
-	int olddirfidnum = -1;
-	int newdirfidnum = -1;
-	int retval = 0;
-
-	dprintk(DEBUG_VFS, "\n");
+	struct inode *old_inode;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *oldfid;
+	struct p9_fid *olddirfid;
+	struct p9_fid *newdirfid;
+	struct p9_wstat wstat;
+	int retval;
 
+	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	retval = 0;
+	old_inode = old_dentry->d_inode;
+	v9ses = v9fs_inode2v9ses(old_inode);
+	oldfid = v9fs_fid_lookup(old_dentry);
 	if(IS_ERR(oldfid))
 		return PTR_ERR(oldfid);
 
 	olddirfid = v9fs_fid_clone(old_dentry->d_parent);
 	if(IS_ERR(olddirfid)) {
 		retval = PTR_ERR(olddirfid);
-		goto Release_lock;
+		goto done;
 	}
 
 	newdirfid = v9fs_fid_clone(new_dentry->d_parent);
 	if(IS_ERR(newdirfid)) {
 		retval = PTR_ERR(newdirfid);
-		goto Clunk_olddir;
+		goto clunk_olddir;
 	}
 
 	/* 9P can only handle file rename in the same directory */
 	if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
-		dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+		P9_DPRINTK(P9_DEBUG_ERROR,
+				"old dir and new dir are different\n");
 		retval = -EXDEV;
-		goto Clunk_newdir;
-	}
-
-	fid = oldfid->fid;
-	olddirfidnum = olddirfid->fid;
-	newdirfidnum = newdirfid->fid;
-
-	if (fid < 0) {
-		dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
-			old_inode->i_ino);
-		retval = -EBADF;
-		goto Clunk_newdir;
+		goto clunk_newdir;
 	}
 
 	v9fs_blank_wstat(&wstat);
 	wstat.muid = v9ses->name;
 	wstat.name = (char *) new_dentry->d_name.name;
+	retval = p9_client_wstat(oldfid, &wstat);
 
-	retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
+clunk_newdir:
+	p9_client_clunk(olddirfid);
 
-	if (retval < 0)
-		PRINT_FCALL_ERROR("wstat error", fcall);
-
-	kfree(fcall);
-
-Clunk_newdir:
-	v9fs_fid_clunk(v9ses, newdirfid);
-
-Clunk_olddir:
-	v9fs_fid_clunk(v9ses, olddirfid);
-
-Release_lock:
-	up(&oldfid->lock);
+clunk_olddir:
+	p9_client_clunk(newdirfid);
 
+done:
 	return retval;
 }
 
@@ -840,28 +708,30 @@ static int
 v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		 struct kstat *stat)
 {
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_clone(dentry);
-	int err = -EPERM;
+	int err;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_stat *st;
 
-	dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
-	if(IS_ERR(fid))
+	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+	err = -EPERM;
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	if (v9ses->cache == CACHE_LOOSE)
+		return simple_getattr(mnt, dentry, stat);
+
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+	st = p9_client_stat(fid);
+	if (IS_ERR(st))
+		return PTR_ERR(st);
 
-	if (err < 0)
-		dprintk(DEBUG_ERROR, "stat error\n");
-	else {
-		v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
-				  dentry->d_inode->i_sb);
+	v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
 		generic_fillattr(dentry->d_inode, stat);
-	}
 
-	kfree(fcall);
-	v9fs_fid_clunk(v9ses, fid);
-	return err;
+	kfree(st);
+	return 0;
 }
 
 /**
@@ -873,13 +743,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
 static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_clone(dentry);
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_wstat wstat;
-	int res = -EPERM;
+	int retval;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_wstat wstat;
 
-	dprintk(DEBUG_VFS, "\n");
+	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	retval = -EPERM;
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	fid = v9fs_fid_lookup(dentry);
 	if(IS_ERR(fid))
 		return PTR_ERR(fid);
 
@@ -904,17 +776,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 			wstat.n_gid = iattr->ia_gid;
 	}
 
-	res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
+	retval = p9_client_wstat(fid, &wstat);
+	if (retval >= 0)
+		retval = inode_setattr(dentry->d_inode, iattr);
 
-	if (res < 0)
-		PRINT_FCALL_ERROR("wstat error", fcall);
-
-	kfree(fcall);
-	if (res >= 0)
-		res = inode_setattr(dentry->d_inode, iattr);
-
-	v9fs_fid_clunk(v9ses, fid);
-	return res;
+	return retval;
 }
 
 /**
@@ -926,7 +792,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  */
 
 void
-v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
+v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
 	struct super_block *sb)
 {
 	int n;
@@ -967,8 +833,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
 		case 'b':
 			break;
 		default:
-			dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
-				type, stat->extension.len, stat->extension.str);
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"Unknown special type %c (%.*s)\n", type,
+				stat->extension.len, stat->extension.str);
 		};
 		inode->i_rdev = MKDEV(major, minor);
 	} else
@@ -976,8 +843,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
 
 	inode->i_size = stat->length;
 
-	inode->i_blocks =
-	    (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	/* not real number of blocks, but 512 byte ones ... */
+	inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
 }
 
 /**
@@ -987,7 +854,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
  * BUG: potential for inode number collisions?
  */
 
-ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+ino_t v9fs_qid2ino(struct p9_qid *qid)
 {
 	u64 path = qid->path + 2;
 	ino_t i = 0;
@@ -1010,56 +877,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
 
 static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 {
-	int retval = -EPERM;
+	int retval;
 
-	struct v9fs_fcall *fcall = NULL;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_clone(dentry);
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_stat *st;
 
+	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+	retval = -EPERM;
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	fid = v9fs_fid_lookup(dentry);
 	if(IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if (!v9ses->extended) {
-		retval = -EBADF;
-		dprintk(DEBUG_ERROR, "not extended\n");
-		goto ClunkFid;
-	}
-
-	dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
-	retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
-
-	if (retval < 0) {
-		dprintk(DEBUG_ERROR, "stat error\n");
-		goto FreeFcall;
-	}
+	if (!v9ses->extended)
+		return -EBADF;
 
-	if (!fcall) {
-		retval = -EIO;
-		goto ClunkFid;
-	}
+	st = p9_client_stat(fid);
+	if (IS_ERR(st))
+		return PTR_ERR(st);
 
-	if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
+	if (!(st->mode & P9_DMSYMLINK)) {
 		retval = -EINVAL;
-		goto FreeFcall;
+		goto done;
 	}
 
 	/* copy extension buffer into buffer */
-	if (fcall->params.rstat.stat.extension.len < buflen)
-		buflen = fcall->params.rstat.stat.extension.len + 1;
+	if (st->extension.len < buflen)
+		buflen = st->extension.len + 1;
 
-	memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+	memmove(buffer, st->extension.str, buflen - 1);
 	buffer[buflen-1] = 0;
 
-	dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len,
-		fcall->params.rstat.stat.extension.str, buffer);
-	retval = buflen;
+	P9_DPRINTK(P9_DEBUG_VFS,
+		"%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
+		st->extension.str, buffer);
 
-FreeFcall:
-	kfree(fcall);
-
-ClunkFid:
-	v9fs_fid_clunk(v9ses, fid);
+	retval = buflen;
 
+done:
+	kfree(st);
 	return retval;
 }
 
@@ -1084,14 +941,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
 	if (buflen > PATH_MAX)
 		buflen = PATH_MAX;
 
-	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
 	retval = v9fs_readlink(dentry, link, buflen);
 
 	if (retval > 0) {
 		if ((ret = copy_to_user(buffer, link, retval)) != 0) {
-			dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
-				ret);
+			P9_DPRINTK(P9_DEBUG_ERROR,
+					"problem copying to user: %d\n", ret);
 			retval = ret;
 		}
 	}
@@ -1112,7 +969,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	int len = 0;
 	char *link = __getname();
 
-	dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+	P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
 
 	if (!link)
 		link = ERR_PTR(-ENOMEM);
@@ -1141,7 +998,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 {
 	char *s = nd_get_link(nd);
 
-	dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+	P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
 	if (!IS_ERR(s))
 		__putname(s);
 }
@@ -1149,66 +1006,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	int mode, const char *extension)
 {
-	int err;
-	u32 fid, perm;
+	u32 perm;
 	struct v9fs_session_info *v9ses;
-	struct v9fs_fid *dfid, *vfid = NULL;
-	struct inode *inode = NULL;
+	struct p9_fid *fid;
 
 	v9ses = v9fs_inode2v9ses(dir);
 	if (!v9ses->extended) {
-		dprintk(DEBUG_ERROR, "not extended\n");
+		P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
 		return -EPERM;
 	}
 
-	dfid = v9fs_fid_clone(dentry->d_parent);
-	if(IS_ERR(dfid)) {
-		err = PTR_ERR(dfid);
-		goto error;
-	}
-
 	perm = unixmode2p9mode(v9ses, mode);
+	fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
+								P9_OREAD);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
 
-	err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name,
-		perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
-
-	if (err)
-		goto clunk_dfid;
-
-	err = v9fs_t_clunk(v9ses, fid);
-	if (err)
-		goto clunk_dfid;
-
-	vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
-	if (IS_ERR(vfid)) {
-		err = PTR_ERR(vfid);
-		vfid = NULL;
-		goto clunk_dfid;
-	}
-
-	inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		inode = NULL;
-		goto free_vfid;
-	}
-
-	if(v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
-	else
-		dentry->d_op = &v9fs_dentry_operations;
-	d_instantiate(dentry, inode);
+	p9_client_clunk(fid);
 	return 0;
-
-free_vfid:
-	v9fs_fid_destroy(vfid);
-
-clunk_dfid:
-	v9fs_fid_clunk(v9ses, dfid);
-
-error:
-	return err;
-
 }
 
 /**
@@ -1224,8 +1039,8 @@ error:
 static int
 v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
-	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
-		symname);
+	P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
+					dentry->d_name.name, symname);
 
 	return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
 }
@@ -1247,11 +1062,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	      struct dentry *dentry)
 {
 	int retval;
-	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-	struct v9fs_fid *oldfid;
+	struct p9_fid *oldfid;
 	char *name;
 
-	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+	P9_DPRINTK(P9_DEBUG_VFS,
+		" %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
 		old_dentry->d_name.name);
 
 	oldfid = v9fs_fid_clone(old_dentry);
@@ -1265,11 +1080,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	sprintf(name, "%d\n", oldfid->fid);
-	retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
+	retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
 	__putname(name);
 
 clunk_fid:
-	v9fs_fid_clunk(v9ses, oldfid);
+	p9_client_clunk(oldfid);
 	return retval;
 }
 
@@ -1288,7 +1103,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	int retval;
 	char *name;
 
-	dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+	P9_DPRINTK(P9_DEBUG_VFS,
+		" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
 		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
 
 	if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7bdf8b32684..ba904371218 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,10 +37,10 @@
 #include <linux/mount.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
-#include "debug.h"
 #include "v9fs.h"
-#include "9p.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
 
@@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		       struct vfsmount *mnt)
 {
 	struct super_block *sb = NULL;
-	struct v9fs_fcall *fcall = NULL;
 	struct inode *inode = NULL;
 	struct dentry *root = NULL;
 	struct v9fs_session_info *v9ses = NULL;
-	struct v9fs_fid *root_fid = NULL;
+	struct p9_stat *st = NULL;
 	int mode = S_IRWXUGO | S_ISVTX;
 	uid_t uid = current->fsuid;
 	gid_t gid = current->fsgid;
-	int stat_result = 0;
-	int newfid = 0;
+	struct p9_fid *fid;
 	int retval = 0;
 
-	dprintk(DEBUG_VFS, " \n");
+	P9_DPRINTK(P9_DEBUG_VFS, " \n");
 
 	v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
 	if (!v9ses)
 		return -ENOMEM;
 
-	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
-		dprintk(DEBUG_ERROR, "problem initiating session\n");
-		retval = newfid;
-		goto out_free_session;
+	fid = v9fs_session_init(v9ses, dev_name, data);
+	if (IS_ERR(fid)) {
+		retval = PTR_ERR(fid);
+		fid = NULL;
+		kfree(v9ses);
+		v9ses = NULL;
+		goto error;
+	}
+
+	st = p9_client_stat(fid);
+	if (IS_ERR(st)) {
+		retval = PTR_ERR(st);
+		goto error;
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
 	if (IS_ERR(sb)) {
 		retval = PTR_ERR(sb);
-		goto out_close_session;
+		goto error;
 	}
 	v9fs_fill_super(sb, v9ses, flags);
 
 	inode = v9fs_get_inode(sb, S_IFDIR | mode);
 	if (IS_ERR(inode)) {
 		retval = PTR_ERR(inode);
-		goto put_back_sb;
+		goto error;
 	}
 
 	inode->i_uid = uid;
@@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	root = d_alloc_root(inode);
 	if (!root) {
 		retval = -ENOMEM;
-		goto put_back_sb;
+		goto error;
 	}
 
 	sb->s_root = root;
+	root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+	v9fs_stat2inode(st, root->d_inode, sb);
+	v9fs_fid_add(root, fid);
 
-	stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
-	if (stat_result < 0) {
-		dprintk(DEBUG_ERROR, "stat error\n");
-		v9fs_t_clunk(v9ses, newfid);
-	} else {
-		/* Setup the Root Inode */
-		root_fid = v9fs_fid_create(v9ses, newfid);
-		if (root_fid == NULL) {
-			retval = -ENOMEM;
-			goto put_back_sb;
-		}
-
-		retval = v9fs_fid_insert(root_fid, root);
-		if (retval < 0) {
-			kfree(fcall);
-			goto put_back_sb;
-		}
-
-		root_fid->qid = fcall->params.rstat.stat.qid;
-		root->d_inode->i_ino =
-		    v9fs_qid2ino(&fcall->params.rstat.stat.qid);
-		v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
-	}
+	return simple_set_mnt(mnt, sb);
 
-	kfree(fcall);
+error:
+	if (fid)
+		p9_client_clunk(fid);
 
-	if (stat_result < 0) {
-		retval = stat_result;
-		goto put_back_sb;
+	if (v9ses) {
+		v9fs_session_close(v9ses);
+		kfree(v9ses);
 	}
 
-	return simple_set_mnt(mnt, sb);
-
-out_close_session:
-	v9fs_session_close(v9ses);
-out_free_session:
-	kfree(v9ses);
-	return retval;
+	if (sb) {
+		up_write(&sb->s_umount);
+		deactivate_super(sb);
+	}
 
-put_back_sb:
-	/* deactivate_super calls v9fs_kill_super which will frees the rest */
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
 	return retval;
 }
 
@@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s)
 {
 	struct v9fs_session_info *v9ses = s->s_fs_info;
 
-	dprintk(DEBUG_VFS, " %p\n", s);
+	P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
 
 	v9fs_dentry_release(s->s_root);	/* clunk root */
 
@@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s)
 
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
-	dprintk(DEBUG_VFS, "exiting kill_super\n");
+	P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
 }
 
 /**
@@ -234,7 +217,7 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
 
 	if (v9ses->debug != 0)
-		seq_printf(m, ",debug=%u", v9ses->debug);
+		seq_printf(m, ",debug=%x", v9ses->debug);
 	if (v9ses->port != V9FS_PORT)
 		seq_printf(m, ",port=%u", v9ses->port);
 	if (v9ses->maxdata != 9000)
diff --git a/fs/Kconfig b/fs/Kconfig
index 7f211613ae1..ee11f8d9408 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2048,7 +2048,7 @@ config AFS_DEBUG
 
 config 9P_FS
 	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
-	depends on INET && EXPERIMENTAL
+	depends on INET && NET_9P && EXPERIMENTAL
 	help
 	  If you say Y here, you will get experimental support for
 	  Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a285592..36e381c6a99 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
 	.fsync		= file_fsync,
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f58..c314a35f091 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
 	.open		= affs_file_open,
 	.release	= affs_file_release,
 	.fsync		= file_fsync,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 73ce561f3ea..a66671082cf 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -8,6 +8,7 @@ kafs-objs := \
 	cmservice.o \
 	dir.o \
 	file.o \
+	flock.o \
 	fsclient.o \
 	inode.o \
 	main.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 24525794814..c548aa346f0 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -37,6 +37,13 @@ typedef enum {
 	AFS_FTYPE_SYMLINK	= 3,
 } afs_file_type_t;
 
+typedef enum {
+	AFS_LOCK_READ		= 0,	/* read lock request */
+	AFS_LOCK_WRITE		= 1,	/* write lock request */
+} afs_lock_type_t;
+
+#define AFS_LOCKWAIT		(5 * 60) /* time until a lock times out (seconds) */
+
 /*
  * AFS file identifier
  */
@@ -120,6 +127,7 @@ struct afs_file_status {
 	struct afs_fid		parent;		/* parent dir ID for non-dirs only */
 	time_t			mtime_client;	/* last time client changed data */
 	time_t			mtime_server;	/* last time server changed data */
+	s32			lock_count;	/* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
 };
 
 /*
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index a18c374ebe0..eb647323d8f 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -31,6 +31,9 @@ enum AFS_FS_Operations {
 	FSGETVOLUMEINFO		= 148,	/* AFS Get information about a volume */
 	FSGETVOLUMESTATUS	= 149,	/* AFS Get volume status information */
 	FSGETROOTVOLUME		= 151,	/* AFS Get root volume name */
+	FSSETLOCK		= 156,	/* AFS Request a file lock */
+	FSEXTENDLOCK		= 157,	/* AFS Extend a file lock */
+	FSRELEASELOCK		= 158,	/* AFS Release a file lock */
 	FSLOOKUP		= 161,	/* AFS lookup file in directory */
 	FSFETCHDATA64		= 65537, /* AFS Fetch file data */
 	FSSTOREDATA64		= 65538, /* AFS Store file data */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index bacf518c6fa..b8243945818 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -125,6 +125,9 @@ static void afs_break_callback(struct afs_server *server,
 		spin_unlock(&server->cb_lock);
 
 		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+		if (list_empty(&vnode->granted_locks) &&
+		    !list_empty(&vnode->pending_locks))
+			afs_lock_may_be_available(vnode);
 		spin_unlock(&vnode->lock);
 	}
 }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 546c59522eb..33fe39ad4e0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -44,6 +44,7 @@ const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.release	= afs_release,
 	.readdir	= afs_readdir,
+	.lock		= afs_lock,
 };
 
 const struct inode_operations afs_dir_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc..525f7c56e06 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,8 +32,10 @@ const struct file_operations afs_file_operations = {
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= afs_file_write,
 	.mmap		= generic_file_readonly_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 	.fsync		= afs_fsync,
+	.lock		= afs_lock,
+	.flock		= afs_flock,
 };
 
 const struct inode_operations afs_file_inode_operations = {
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
new file mode 100644
index 00000000000..8f07f8d1bfa
--- /dev/null
+++ b/fs/afs/flock.c
@@ -0,0 +1,558 @@
+/* AFS file locking support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/smp_lock.h>
+#include "internal.h"
+
+#define AFS_LOCK_GRANTED	0
+#define AFS_LOCK_PENDING	1
+
+static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
+static void afs_fl_release_private(struct file_lock *fl);
+
+static struct workqueue_struct *afs_lock_manager;
+
+static struct file_lock_operations afs_lock_ops = {
+	.fl_copy_lock		= afs_fl_copy_lock,
+	.fl_release_private	= afs_fl_release_private,
+};
+
+/*
+ * initialise the lock manager thread if it isn't already running
+ */
+static int afs_init_lock_manager(void)
+{
+	if (!afs_lock_manager) {
+		afs_lock_manager = create_singlethread_workqueue("kafs_lockd");
+		if (!afs_lock_manager)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * destroy the lock manager thread if it's running
+ */
+void __exit afs_kill_lock_manager(void)
+{
+	if (afs_lock_manager)
+		destroy_workqueue(afs_lock_manager);
+}
+
+/*
+ * if the callback is broken on this vnode, then the lock may now be available
+ */
+void afs_lock_may_be_available(struct afs_vnode *vnode)
+{
+	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+}
+
+/*
+ * the lock will time out in 5 minutes unless we extend it, so schedule
+ * extension in a bit less than that time
+ */
+static void afs_schedule_lock_extension(struct afs_vnode *vnode)
+{
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work,
+			   AFS_LOCKWAIT * HZ / 2);
+}
+
+/*
+ * do work for a lock, including:
+ * - probing for a lock we're waiting on but didn't get immediately
+ * - extending a lock that's close to timing out
+ */
+void afs_lock_work(struct work_struct *work)
+{
+	struct afs_vnode *vnode =
+		container_of(work, struct afs_vnode, lock_work.work);
+	struct file_lock *fl;
+	afs_lock_type_t type;
+	struct key *key;
+	int ret;
+
+	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
+	spin_lock(&vnode->lock);
+
+	if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
+		_debug("unlock");
+		spin_unlock(&vnode->lock);
+
+		/* attempt to release the server lock; if it fails, we just
+		 * wait 5 minutes and it'll time out anyway */
+		ret = afs_vnode_release_lock(vnode, vnode->unlock_key);
+		if (ret < 0)
+			printk(KERN_WARNING "AFS:"
+			       " Failed to release lock on {%x:%x} error %d\n",
+			       vnode->fid.vid, vnode->fid.vnode, ret);
+
+		spin_lock(&vnode->lock);
+		key_put(vnode->unlock_key);
+		vnode->unlock_key = NULL;
+		clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
+	}
+
+	/* if we've got a lock, then it must be time to extend that lock as AFS
+	 * locks time out after 5 minutes */
+	if (!list_empty(&vnode->granted_locks)) {
+		_debug("extend");
+
+		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
+			BUG();
+		fl = list_entry(vnode->granted_locks.next,
+				struct file_lock, fl_u.afs.link);
+		key = key_get(fl->fl_file->private_data);
+		spin_unlock(&vnode->lock);
+
+		ret = afs_vnode_extend_lock(vnode, key);
+		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		key_put(key);
+		switch (ret) {
+		case 0:
+			afs_schedule_lock_extension(vnode);
+			break;
+		default:
+			/* ummm... we failed to extend the lock - retry
+			 * extension shortly */
+			printk(KERN_WARNING "AFS:"
+			       " Failed to extend lock on {%x:%x} error %d\n",
+			       vnode->fid.vid, vnode->fid.vnode, ret);
+			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
+					   HZ * 10);
+			break;
+		}
+		_leave(" [extend]");
+		return;
+	}
+
+	/* if we don't have a granted lock, then we must've been called back by
+	 * the server, and so if might be possible to get a lock we're
+	 * currently waiting for */
+	if (!list_empty(&vnode->pending_locks)) {
+		_debug("get");
+
+		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
+			BUG();
+		fl = list_entry(vnode->pending_locks.next,
+				struct file_lock, fl_u.afs.link);
+		key = key_get(fl->fl_file->private_data);
+		type = (fl->fl_type == F_RDLCK) ?
+			AFS_LOCK_READ : AFS_LOCK_WRITE;
+		spin_unlock(&vnode->lock);
+
+		ret = afs_vnode_set_lock(vnode, key, type);
+		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		switch (ret) {
+		case -EWOULDBLOCK:
+			_debug("blocked");
+			break;
+		case 0:
+			_debug("acquired");
+			if (type == AFS_LOCK_READ)
+				set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
+			else
+				set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
+			ret = AFS_LOCK_GRANTED;
+		default:
+			spin_lock(&vnode->lock);
+			/* the pending lock may have been withdrawn due to a
+			 * signal */
+			if (list_entry(vnode->pending_locks.next,
+				       struct file_lock, fl_u.afs.link) == fl) {
+				fl->fl_u.afs.state = ret;
+				if (ret == AFS_LOCK_GRANTED)
+					list_move_tail(&fl->fl_u.afs.link,
+						       &vnode->granted_locks);
+				else
+					list_del_init(&fl->fl_u.afs.link);
+				wake_up(&fl->fl_wait);
+				spin_unlock(&vnode->lock);
+			} else {
+				_debug("withdrawn");
+				clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
+				clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
+				spin_unlock(&vnode->lock);
+				afs_vnode_release_lock(vnode, key);
+				if (!list_empty(&vnode->pending_locks))
+					afs_lock_may_be_available(vnode);
+			}
+			break;
+		}
+		key_put(key);
+		_leave(" [pend]");
+		return;
+	}
+
+	/* looks like the lock request was withdrawn on a signal */
+	spin_unlock(&vnode->lock);
+	_leave(" [no locks]");
+}
+
+/*
+ * pass responsibility for the unlocking of a vnode on the server to the
+ * manager thread, lest a pending signal in the calling thread interrupt
+ * AF_RXRPC
+ * - the caller must hold the vnode lock
+ */
+static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
+{
+	cancel_delayed_work(&vnode->lock_work);
+	if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
+	    !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
+		BUG();
+	if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
+		BUG();
+	vnode->unlock_key = key_get(key);
+	afs_lock_may_be_available(vnode);
+}
+
+/*
+ * request a lock on a file on the server
+ */
+static int afs_do_setlk(struct file *file, struct file_lock *fl)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+	afs_lock_type_t type;
+	struct key *key = file->private_data;
+	int ret;
+
+	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+
+	/* only whole-file locks are supported */
+	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
+		return -EINVAL;
+
+	ret = afs_init_lock_manager();
+	if (ret < 0)
+		return ret;
+
+	fl->fl_ops = &afs_lock_ops;
+	INIT_LIST_HEAD(&fl->fl_u.afs.link);
+	fl->fl_u.afs.state = AFS_LOCK_PENDING;
+
+	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+
+	lock_kernel();
+
+	/* make sure we've got a callback on this file and that our view of the
+	 * data version is up to date */
+	ret = afs_vnode_fetch_status(vnode, NULL, key);
+	if (ret < 0)
+		goto error;
+
+	if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
+		ret = -EAGAIN;
+		goto error;
+	}
+
+	spin_lock(&vnode->lock);
+
+	if (list_empty(&vnode->pending_locks)) {
+		/* if there's no-one else with a lock on this vnode, then we
+		 * need to ask the server for a lock */
+		if (list_empty(&vnode->granted_locks)) {
+			_debug("not locked");
+			ASSERTCMP(vnode->flags &
+				  ((1 << AFS_VNODE_LOCKING) |
+				   (1 << AFS_VNODE_READLOCKED) |
+				   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
+			list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
+			set_bit(AFS_VNODE_LOCKING, &vnode->flags);
+			spin_unlock(&vnode->lock);
+
+			ret = afs_vnode_set_lock(vnode, key, type);
+			clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+			switch (ret) {
+			case 0:
+				goto acquired_server_lock;
+			case -EWOULDBLOCK:
+				spin_lock(&vnode->lock);
+				ASSERT(list_empty(&vnode->granted_locks));
+				ASSERTCMP(vnode->pending_locks.next, ==,
+					  &fl->fl_u.afs.link);
+				goto wait;
+			default:
+				spin_lock(&vnode->lock);
+				list_del_init(&fl->fl_u.afs.link);
+				spin_unlock(&vnode->lock);
+				goto error;
+			}
+		}
+
+		/* if we've already got a readlock on the server and no waiting
+		 * writelocks, then we might be able to instantly grant another
+		 * readlock */
+		if (type == AFS_LOCK_READ &&
+		    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
+			_debug("instant readlock");
+			ASSERTCMP(vnode->flags &
+				  ((1 << AFS_VNODE_LOCKING) |
+				   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
+			ASSERT(!list_empty(&vnode->granted_locks));
+			goto sharing_existing_lock;
+		}
+	}
+
+	/* otherwise, we need to wait for a local lock to become available */
+	_debug("wait local");
+	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
+wait:
+	if (!(fl->fl_flags & FL_SLEEP)) {
+		_debug("noblock");
+		ret = -EAGAIN;
+		goto abort_attempt;
+	}
+	spin_unlock(&vnode->lock);
+
+	/* now we need to sleep and wait for the lock manager thread to get the
+	 * lock from the server */
+	_debug("sleep");
+	ret = wait_event_interruptible(fl->fl_wait,
+				       fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
+	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
+		ret = fl->fl_u.afs.state;
+		if (ret < 0)
+			goto error;
+		spin_lock(&vnode->lock);
+		goto given_lock;
+	}
+
+	/* we were interrupted, but someone may still be in the throes of
+	 * giving us the lock */
+	_debug("intr");
+	ASSERTCMP(ret, ==, -ERESTARTSYS);
+
+	spin_lock(&vnode->lock);
+	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
+		ret = fl->fl_u.afs.state;
+		if (ret < 0) {
+			spin_unlock(&vnode->lock);
+			goto error;
+		}
+		goto given_lock;
+	}
+
+abort_attempt:
+	/* we aren't going to get the lock, either because we're unwilling to
+	 * wait, or because some signal happened */
+	_debug("abort");
+	if (list_empty(&vnode->granted_locks) &&
+	    vnode->pending_locks.next == &fl->fl_u.afs.link) {
+		if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
+			/* kick the next pending lock into having a go */
+			list_del_init(&fl->fl_u.afs.link);
+			afs_lock_may_be_available(vnode);
+		}
+	} else {
+		list_del_init(&fl->fl_u.afs.link);
+	}
+	spin_unlock(&vnode->lock);
+	goto error;
+
+acquired_server_lock:
+	/* we've acquired a server lock, but it needs to be renewed after 5
+	 * mins */
+	spin_lock(&vnode->lock);
+	afs_schedule_lock_extension(vnode);
+	if (type == AFS_LOCK_READ)
+		set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
+	else
+		set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
+sharing_existing_lock:
+	/* the lock has been granted as far as we're concerned... */
+	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+given_lock:
+	/* ... but we do still need to get the VFS's blessing */
+	ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
+	ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
+				(1 << AFS_VNODE_WRITELOCKED))) != 0);
+	ret = posix_lock_file(file, fl, NULL);
+	if (ret < 0)
+		goto vfs_rejected_lock;
+	spin_unlock(&vnode->lock);
+
+	/* again, make sure we've got a callback on this file and, again, make
+	 * sure that our view of the data version is up to date (we ignore
+	 * errors incurred here and deal with the consequences elsewhere) */
+	afs_vnode_fetch_status(vnode, NULL, key);
+
+error:
+	unlock_kernel();
+	_leave(" = %d", ret);
+	return ret;
+
+vfs_rejected_lock:
+	/* the VFS rejected the lock we just obtained, so we have to discard
+	 * what we just got */
+	_debug("vfs refused %d", ret);
+	list_del_init(&fl->fl_u.afs.link);
+	if (list_empty(&vnode->granted_locks))
+		afs_defer_unlock(vnode, key);
+	spin_unlock(&vnode->lock);
+	goto abort_attempt;
+}
+
+/*
+ * unlock on a file on the server
+ */
+static int afs_do_unlk(struct file *file, struct file_lock *fl)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+	struct key *key = file->private_data;
+	int ret;
+
+	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+
+	/* only whole-file unlocks are supported */
+	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
+		return -EINVAL;
+
+	fl->fl_ops = &afs_lock_ops;
+	INIT_LIST_HEAD(&fl->fl_u.afs.link);
+	fl->fl_u.afs.state = AFS_LOCK_PENDING;
+
+	spin_lock(&vnode->lock);
+	ret = posix_lock_file(file, fl, NULL);
+	if (ret < 0) {
+		spin_unlock(&vnode->lock);
+		_leave(" = %d [vfs]", ret);
+		return ret;
+	}
+
+	/* discard the server lock only if all granted locks are gone */
+	if (list_empty(&vnode->granted_locks))
+		afs_defer_unlock(vnode, key);
+	spin_unlock(&vnode->lock);
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * return information about a lock we currently hold, if indeed we hold one
+ */
+static int afs_do_getlk(struct file *file, struct file_lock *fl)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+	struct key *key = file->private_data;
+	int ret, lock_count;
+
+	_enter("");
+
+	fl->fl_type = F_UNLCK;
+
+	mutex_lock(&vnode->vfs_inode.i_mutex);
+
+	/* check local lock records first */
+	ret = 0;
+	if (posix_test_lock(file, fl) == 0) {
+		/* no local locks; consult the server */
+		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		if (ret < 0)
+			goto error;
+		lock_count = vnode->status.lock_count;
+		if (lock_count) {
+			if (lock_count > 0)
+				fl->fl_type = F_RDLCK;
+			else
+				fl->fl_type = F_WRLCK;
+			fl->fl_start = 0;
+			fl->fl_end = OFFSET_MAX;
+		}
+	}
+
+error:
+	mutex_unlock(&vnode->vfs_inode.i_mutex);
+	_leave(" = %d [%hd]", ret, fl->fl_type);
+	return ret;
+}
+
+/*
+ * manage POSIX locks on a file
+ */
+int afs_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+
+	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+	       vnode->fid.vid, vnode->fid.vnode, cmd,
+	       fl->fl_type, fl->fl_flags,
+	       (long long) fl->fl_start, (long long) fl->fl_end);
+
+	/* AFS doesn't support mandatory locks */
+	if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
+	    fl->fl_type != F_UNLCK)
+		return -ENOLCK;
+
+	if (IS_GETLK(cmd))
+		return afs_do_getlk(file, fl);
+	if (fl->fl_type == F_UNLCK)
+		return afs_do_unlk(file, fl);
+	return afs_do_setlk(file, fl);
+}
+
+/*
+ * manage FLOCK locks on a file
+ */
+int afs_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+
+	_enter("{%x:%u},%d,{t=%x,fl=%x}",
+	       vnode->fid.vid, vnode->fid.vnode, cmd,
+	       fl->fl_type, fl->fl_flags);
+
+	/*
+	 * No BSD flocks over NFS allowed.
+	 * Note: we could try to fake a POSIX lock request here by
+	 * using ((u32) filp | 0x80000000) or some such as the pid.
+	 * Not sure whether that would be unique, though, or whether
+	 * that would break in other places.
+	 */
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+
+	/* we're simulating flock() locks using posix locks on the server */
+	fl->fl_owner = (fl_owner_t) file;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+
+	if (fl->fl_type == F_UNLCK)
+		return afs_do_unlk(file, fl);
+	return afs_do_setlk(file, fl);
+}
+
+/*
+ * the POSIX lock management core VFS code copies the lock record and adds the
+ * copy into its own list, so we need to add that copy to the vnode's lock
+ * queue in the same place as the original (which will be deleted shortly
+ * after)
+ */
+static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	_enter("");
+
+	list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
+}
+
+/*
+ * need to remove this lock from the vnode queue when it's removed from the
+ * VFS's list
+ */
+static void afs_fl_release_private(struct file_lock *fl)
+{
+	_enter("");
+
+	list_del_init(&fl->fl_u.afs.link);
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 5dff1308b6f..023b95b0d9d 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -67,7 +67,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 	EXTRACT(status->group);
 	bp++; /* sync counter */
 	data_version |= (u64) ntohl(*bp++) << 32;
-	bp++; /* lock count */
+	EXTRACT(status->lock_count);
 	size |= (u64) ntohl(*bp++) << 32;
 	bp++; /* spare 4 */
 	*_bp = bp;
@@ -1748,3 +1748,156 @@ int afs_fs_get_volume_status(struct afs_server *server,
 
 	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
 }
+
+/*
+ * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
+ */
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
+				    struct sk_buff *skb, bool last)
+{
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.SetLock operation type
+ */
+static const struct afs_call_type afs_RXFSSetLock = {
+	.name		= "FS.SetLock",
+	.deliver	= afs_deliver_fs_xxxx_lock,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * FS.ExtendLock operation type
+ */
+static const struct afs_call_type afs_RXFSExtendLock = {
+	.name		= "FS.ExtendLock",
+	.deliver	= afs_deliver_fs_xxxx_lock,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * FS.ReleaseLock operation type
+ */
+static const struct afs_call_type afs_RXFSReleaseLock = {
+	.name		= "FS.ReleaseLock",
+	.deliver	= afs_deliver_fs_xxxx_lock,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * get a lock on a file
+ */
+int afs_fs_set_lock(struct afs_server *server,
+		    struct key *key,
+		    struct afs_vnode *vnode,
+		    afs_lock_type_t type,
+		    const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSSETLOCK);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(type);
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * extend a lock on a file
+ */
+int afs_fs_extend_lock(struct afs_server *server,
+		       struct key *key,
+		       struct afs_vnode *vnode,
+		       const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSEXTENDLOCK);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * release a lock on a file
+ */
+int afs_fs_release_lock(struct afs_server *server,
+			struct key *key,
+			struct afs_vnode *vnode,
+			const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSRELEASELOCK);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 2c55dd94a1d..6306438f331 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -351,10 +351,18 @@ struct afs_vnode {
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
+#define AFS_VNODE_LOCKING	6		/* set if waiting for lock on vnode */
+#define AFS_VNODE_READLOCKED	7		/* set if vnode is read-locked on the server */
+#define AFS_VNODE_WRITELOCKED	8		/* set if vnode is write-locked on the server */
+#define AFS_VNODE_UNLOCKING	9		/* set if vnode is being unlocked on the server */
 
 	long			acl_order;	/* ACL check count (callback break count) */
 
 	struct list_head	writebacks;	/* alterations in pagecache that need writing */
+	struct list_head	pending_locks;	/* locks waiting to be granted */
+	struct list_head	granted_locks;	/* locks granted on this file */
+	struct delayed_work	lock_work;	/* work to be done in locking */
+	struct key		*unlock_key;	/* key to be used in unlocking */
 
 	/* outstanding callback notification on this file */
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
@@ -474,6 +482,15 @@ extern int afs_open(struct inode *, struct file *);
 extern int afs_release(struct inode *, struct file *);
 
 /*
+ * flock.c
+ */
+extern void __exit afs_kill_lock_manager(void);
+extern void afs_lock_work(struct work_struct *);
+extern void afs_lock_may_be_available(struct afs_vnode *);
+extern int afs_lock(struct file *, int, struct file_lock *);
+extern int afs_flock(struct file *, int, struct file_lock *);
+
+/*
  * fsclient.c
  */
 extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
@@ -513,6 +530,15 @@ extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
 				    struct afs_vnode *,
 				    struct afs_volume_status *,
 				    const struct afs_wait_mode *);
+extern int afs_fs_set_lock(struct afs_server *, struct key *,
+			   struct afs_vnode *, afs_lock_type_t,
+			   const struct afs_wait_mode *);
+extern int afs_fs_extend_lock(struct afs_server *, struct key *,
+			      struct afs_vnode *,
+			      const struct afs_wait_mode *);
+extern int afs_fs_release_lock(struct afs_server *, struct key *,
+			       struct afs_vnode *,
+			       const struct afs_wait_mode *);
 
 /*
  * inode.c
@@ -681,6 +707,10 @@ extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
 extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
 extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
 				       struct afs_volume_status *);
+extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
+			      afs_lock_type_t);
+extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
+extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
 
 /*
  * volume.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index cd21195bbb2..0f60f6b3576 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -168,6 +168,7 @@ static void __exit afs_exit(void)
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
 
 	afs_fs_exit();
+	afs_kill_lock_manager();
 	afs_close_socket();
 	afs_purge_servers();
 	afs_callback_update_kill();
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index d1a889c4074..2d33a5f7d21 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -35,6 +35,7 @@ int afs_abort_to_error(u32 abort_code)
 	case VOVERQUOTA:	return -EDQUOT;
 	case VBUSY:		return -EBUSY;
 	case VMOVED:		return -ENXIO;
+	case 0x2f6df0a:		return -EWOULDBLOCK;
 	case 0x2f6df0c:		return -EACCES;
 	case 0x2f6df0f:		return -EBUSY;
 	case 0x2f6df10:		return -EEXIST;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 13df512aea9..6edb56683b9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -201,23 +201,9 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
  */
 static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
 {
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
 	/* lock the list against modification */
 	down_read(&afs_proc_cells_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return (void *) 1;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &afs_proc_cells)
-		if (!pos--)
-			break;
-
-	return _p != &afs_proc_cells ? _p : NULL;
+	return seq_list_start_head(&afs_proc_cells, *_pos);
 }
 
 /*
@@ -225,14 +211,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
  */
 static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
 {
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = v == (void *) 1 ? afs_proc_cells.next : _p->next;
-
-	return _p != &afs_proc_cells ? _p : NULL;
+	return seq_list_next(v, &afs_proc_cells, pos);
 }
 
 /*
@@ -250,7 +229,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
 	struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
 
-	if (v == (void *) 1) {
+	if (v == &afs_proc_cells) {
 		/* display header on line 1 */
 		seq_puts(m, "USE NAME\n");
 		return 0;
@@ -503,26 +482,13 @@ static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
  */
 static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
 {
-	struct list_head *_p;
 	struct afs_cell *cell = m->private;
-	loff_t pos = *_pos;
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 
 	/* lock the list against modification */
 	down_read(&cell->vl_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return (void *) 1;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &cell->vl_list)
-		if (!pos--)
-			break;
-
-	return _p != &cell->vl_list ? _p : NULL;
+	return seq_list_start_head(&cell->vl_list, *_pos);
 }
 
 /*
@@ -531,17 +497,10 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
 static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 					loff_t *_pos)
 {
-	struct list_head *_p;
 	struct afs_cell *cell = p->private;
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
-
-	(*_pos)++;
-
-	_p = v;
-	_p = (v == (void *) 1) ? cell->vl_list.next : _p->next;
-
-	return (_p != &cell->vl_list) ? _p : NULL;
+	return seq_list_next(v, &cell->vl_list, _pos);
 }
 
 /*
@@ -569,11 +528,12 @@ const char afs_vlocation_states[][4] = {
  */
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 {
+	struct afs_cell *cell = m->private;
 	struct afs_vlocation *vlocation =
 		list_entry(v, struct afs_vlocation, link);
 
 	/* display header on line 1 */
-	if (v == (void *) 1) {
+	if (v == &cell->vl_list) {
 		seq_puts(m, "USE STT VLID[0]  VLID[1]  VLID[2]  NAME\n");
 		return 0;
 	}
@@ -734,26 +694,13 @@ static int afs_proc_cell_servers_release(struct inode *inode,
 static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
 	__acquires(m->private->servers_lock)
 {
-	struct list_head *_p;
 	struct afs_cell *cell = m->private;
-	loff_t pos = *_pos;
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 
 	/* lock the list against modification */
 	read_lock(&cell->servers_lock);
-
-	/* allow for the header line */
-	if (!pos)
-		return (void *) 1;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &cell->servers)
-		if (!pos--)
-			break;
-
-	return _p != &cell->servers ? _p : NULL;
+	return seq_list_start_head(&cell->servers, *_pos);
 }
 
 /*
@@ -762,17 +709,10 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
 static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 					loff_t *_pos)
 {
-	struct list_head *_p;
 	struct afs_cell *cell = p->private;
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
-
-	(*_pos)++;
-
-	_p = v;
-	_p = v == (void *) 1 ? cell->servers.next : _p->next;
-
-	return _p != &cell->servers ? _p : NULL;
+	return seq_list_next(v, &cell->servers, _pos);
 }
 
 /*
@@ -791,11 +731,12 @@ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
  */
 static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 {
+	struct afs_cell *cell = m->private;
 	struct afs_server *server = list_entry(v, struct afs_server, link);
 	char ipaddr[20];
 
 	/* display header on line 1 */
-	if (v == (void *) 1) {
+	if (v == &cell->servers) {
 		seq_puts(m, "USE ADDR            STATE\n");
 		return 0;
 	}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 2e8496ba120..993cdf1cce3 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -460,6 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 	spin_lock_init(&vnode->writeback_lock);
 	spin_lock_init(&vnode->lock);
 	INIT_LIST_HEAD(&vnode->writebacks);
+	INIT_LIST_HEAD(&vnode->pending_locks);
+	INIT_LIST_HEAD(&vnode->granted_locks);
+	INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
 	INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
 }
 
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 232c55dc245..2f05c4fc2a7 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -561,7 +561,7 @@ no_server:
 /*
  * create a hard link
  */
-extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
 			  struct key *key, const char *name)
 {
 	struct afs_server *server;
@@ -887,11 +887,6 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 	       vnode->fid.unique,
 	       key_serial(key));
 
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
 	do {
 		/* pick a server to query */
 		server = afs_volume_pick_fileserver(vnode);
@@ -905,20 +900,127 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
 	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
+	if (ret == 0)
+		afs_put_server(server);
+
+	_leave(" = %d", ret);
+	return ret;
+
+no_server:
+	return PTR_ERR(server);
+}
+
+/*
+ * get a lock on a file
+ */
+int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
+		       afs_lock_type_t type)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x,%u",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key), type);
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_set_lock(server, key, vnode, type, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0)
+		afs_put_server(server);
+
+	_leave(" = %d", ret);
+	return ret;
+
+no_server:
+	return PTR_ERR(server);
+}
+
+/*
+ * extend a lock on a file
+ */
+int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_extend_lock(server, key, vnode, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0)
+		afs_put_server(server);
+
+	_leave(" = %d", ret);
+	return ret;
+
+no_server:
+	return PTR_ERR(server);
+}
+
+/*
+ * release a lock on a file
+ */
+int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%x:%u.%u},%x",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_release_lock(server, key, vnode, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0)
 		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
 
 	_leave(" = %d", ret);
 	return ret;
 
 no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
 	return PTR_ERR(server);
 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 40fe3a3222e..a260198306c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -53,7 +53,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
 };
 
 /**
- * anon_inode_getfd - creates a new file instance by hooking it up to and
+ * anon_inode_getfd - creates a new file instance by hooking it up to an
  *                    anonymous inode, and a dentry that describe the "class"
  *                    of the file
  *
@@ -66,7 +66,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfd() will share a single inode, by
+ * All the files created with anon_inode_getfd() will share a single inode,
  * hence saving memory and avoiding code duplication for the file/inode/dentry
  * setup.
  */
@@ -141,9 +141,9 @@ err_put_filp:
 }
 
 /*
- * A single inode exist for all anon_inode files. Contrary to pipes,
- * anon_inode inodes has no per-instance data associated, so we can avoid
- * the allocation of multiple of them.
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
  */
 static struct inode *anon_inode_mkinode(void)
 {
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eed..521ff7caadb 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
 	return -EIO;
 }
 
-static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
-			size_t count, read_actor_t actor, void *target)
-{
-	return -EIO;
-}
-
 static ssize_t bad_file_sendpage(struct file *file, struct page *page,
 			int off, size_t len, loff_t *pos, int more)
 {
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
 	.aio_fsync	= bad_file_aio_fsync,
 	.fasync		= bad_file_fasync,
 	.lock		= bad_file_lock,
-	.sendfile	= bad_file_sendfile,
 	.sendpage	= bad_file_sendpage,
 	.get_unmapped_area = bad_file_get_unmapped_area,
 	.check_flags	= bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e6..24310e9ee05 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa8ea33ab0b..a27e42bf340 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@
 
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
 
 /*
  * If we don't support core dumping, then supply a NULL so we
@@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = {
 		.hasvdso	= 1
 };
 
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+#define BAD_ADDR(x) IS_ERR_VALUE(x)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -285,33 +285,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 #ifndef elf_map
 
 static unsigned long elf_map(struct file *filep, unsigned long addr,
-		struct elf_phdr *eppnt, int prot, int type)
+		struct elf_phdr *eppnt, int prot, int type,
+		unsigned long total_size)
 {
 	unsigned long map_addr;
-	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(size);
 
-	down_write(&current->mm->mmap_sem);
 	/* mmap() will return -EINVAL if given a zero size, but a
 	 * segment with zero filesize is perfectly valid */
-	if (eppnt->p_filesz + pageoffset)
-		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-				   eppnt->p_filesz + pageoffset, prot, type,
-				   eppnt->p_offset - pageoffset);
-	else
-		map_addr = ELF_PAGESTART(addr);
+	if (!size)
+		return addr;
+
+	down_write(&current->mm->mmap_sem);
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			do_munmap(current->mm, map_addr+size, total_size-size);
+	} else
+		map_addr = do_mmap(filep, addr, size, prot, type, off);
+
 	up_write(&current->mm->mmap_sem);
 	return(map_addr);
 }
 
 #endif /* !elf_map */
 
+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+	int i, first_idx = -1, last_idx = -1;
+
+	for (i = 0; i < nr; i++) {
+		if (cmds[i].p_type == PT_LOAD) {
+			last_idx = i;
+			if (first_idx == -1)
+				first_idx = i;
+		}
+	}
+	if (first_idx == -1)
+		return 0;
+
+	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+				ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
 /* This is much more generalized than the library routine read function,
    so we keep this separate.  Technically the library read function
    is only provided so that we can read a.out libraries that have
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
-		struct file *interpreter, unsigned long *interp_load_addr)
+		struct file *interpreter, unsigned long *interp_map_addr,
+		unsigned long no_base)
 {
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
@@ -319,6 +356,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 	int load_addr_set = 0;
 	unsigned long last_bss = 0, elf_bss = 0;
 	unsigned long error = ~0UL;
+	unsigned long total_size;
 	int retval, i, size;
 
 	/* First of all, some simple consistency checks */
@@ -357,6 +395,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 		goto out_close;
 	}
 
+	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+	if (!total_size) {
+		error = -EINVAL;
+		goto out_close;
+	}
+
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
@@ -374,9 +418,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 				elf_type |= MAP_FIXED;
+			else if (no_base && interp_elf_ex->e_type == ET_DYN)
+				load_addr = -vaddr;
 
 			map_addr = elf_map(interpreter, load_addr + vaddr,
-					   eppnt, elf_prot, elf_type);
+					   eppnt, elf_prot, elf_type, total_size);
+			total_size = 0;
+			if (!*interp_map_addr)
+				*interp_map_addr = map_addr;
 			error = map_addr;
 			if (BAD_ADDR(map_addr))
 				goto out_close;
@@ -442,8 +491,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			goto out_close;
 	}
 
-	*interp_load_addr = load_addr;
-	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+	error = load_addr;
 
 out_close:
 	kfree(elf_phdata);
@@ -540,7 +588,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	int elf_exec_fileno;
 	int retval, i;
 	unsigned int size;
-	unsigned long elf_entry, interp_load_addr = 0;
+	unsigned long elf_entry;
+	unsigned long interp_load_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
@@ -808,9 +857,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->start_stack = bprm->p;
 
 	/* Now we do a little grungy work by mmaping the ELF image into
-	   the correct location in memory.  At this point, we assume that
-	   the image should be loaded at fixed address, not at a variable
-	   address. */
+	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		int elf_prot = 0, elf_flags;
@@ -864,11 +911,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
+#ifdef CONFIG_X86
+			load_bias = 0;
+#else
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-				elf_prot, elf_flags);
+				elf_prot, elf_flags,0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?
@@ -944,13 +995,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	}
 
 	if (elf_interpreter) {
-		if (interpreter_type == INTERPRETER_AOUT)
+		if (interpreter_type == INTERPRETER_AOUT) {
 			elf_entry = load_aout_interp(&loc->interp_ex,
 						     interpreter);
-		else
+		} else {
+			unsigned long uninitialized_var(interp_map_addr);
+
 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
 						    interpreter,
-						    &interp_load_addr);
+						    &interp_map_addr,
+						    load_bias);
+			if (!BAD_ADDR(elf_entry)) {
+				/*
+				 * load_elf_interp() returns relocation
+				 * adjustment
+				 */
+				interp_load_addr = elf_entry;
+				elf_entry += loc->interp_elf_ex.e_entry;
+			}
+		}
 		if (BAD_ADDR(elf_entry)) {
 			force_sig(SIGSEGV, current);
 			retval = IS_ERR((void *)elf_entry) ?
@@ -1499,6 +1562,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 #endif
 	int thread_status_size = 0;
 	elf_addr_t *auxv;
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+	int extra_notes_size;
+#endif
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1628,7 +1694,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 		sz += thread_status_size;
 
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
-		sz += ELF_CORE_EXTRA_NOTES_SIZE;
+		extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
+		sz += extra_notes_size;
 #endif
 
 		fill_elf_note_phdr(&phdr, sz, offset);
@@ -1674,6 +1741,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
 	ELF_CORE_WRITE_EXTRA_NOTES;
+	foffset += extra_notes_size;
 #endif
 
 	/* write out the thread status notes section */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7b0265d7f3a..861141b4f6d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -558,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			if (!realdatastart)
 				realdatastart = (unsigned long) -ENOMEM;
 			printk("Unable to allocate RAM for process data, errno %d\n",
-					(int)-datapos);
+					(int)-realdatastart);
 			do_munmap(current->mm, textpos, text_len);
 			ret = realdatastart;
 			goto err;
diff --git a/fs/bio.c b/fs/bio.c
index 093345f0012..33e46340a76 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
-EXPORT_SYMBOL(bio_map_user);
-EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f5..3635315e3b9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -588,12 +588,10 @@ EXPORT_SYMBOL(bdget);
 
 long nr_blockdev_pages(void)
 {
-	struct list_head *p;
+	struct block_device *bdev;
 	long ret = 0;
 	spin_lock(&bdev_lock);
-	list_for_each(p, &all_bdevs) {
-		struct block_device *bdev;
-		bdev = list_entry(p, struct block_device, bd_list);
+	list_for_each_entry(bdev, &all_bdevs, bd_list) {
 		ret += bdev->bd_inode->i_mapping->nrpages;
 	}
 	spin_unlock(&bdev_lock);
@@ -874,7 +872,7 @@ static struct bd_holder *find_bd_holder(struct block_device *bdev,
  */
 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 {
-	int ret;
+	int err;
 
 	if (!bo)
 		return -EINVAL;
@@ -882,15 +880,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 	if (!bd_holder_grab_dirs(bdev, bo))
 		return -EBUSY;
 
-	ret = add_symlink(bo->sdir, bo->sdev);
-	if (ret == 0) {
-		ret = add_symlink(bo->hdir, bo->hdev);
-		if (ret)
-			del_symlink(bo->sdir, bo->sdev);
+	err = add_symlink(bo->sdir, bo->sdev);
+	if (err)
+		return err;
+
+	err = add_symlink(bo->hdir, bo->hdev);
+	if (err) {
+		del_symlink(bo->sdir, bo->sdev);
+		return err;
 	}
-	if (ret == 0)
-		list_add_tail(&bo->list, &bdev->bd_holder_list);
-	return ret;
+
+	list_add_tail(&bo->list, &bdev->bd_holder_list);
+	return 0;
 }
 
 /**
@@ -948,7 +949,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev,
 static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 				struct kobject *kobj)
 {
-	int res;
+	int err;
 	struct bd_holder *bo, *found;
 
 	if (!kobj)
@@ -959,21 +960,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 		return -ENOMEM;
 
 	mutex_lock(&bdev->bd_mutex);
-	res = bd_claim(bdev, holder);
-	if (res == 0) {
-		found = find_bd_holder(bdev, bo);
-		if (found == NULL) {
-			res = add_bd_holder(bdev, bo);
-			if (res)
-				bd_release(bdev);
-		}
-	}
 
-	if (res || found)
-		free_bd_holder(bo);
-	mutex_unlock(&bdev->bd_mutex);
+	err = bd_claim(bdev, holder);
+	if (err)
+		goto fail;
 
-	return res;
+	found = find_bd_holder(bdev, bo);
+	if (found)
+		goto fail;
+
+	err = add_bd_holder(bdev, bo);
+	if (err)
+		bd_release(bdev);
+	else
+		bo = NULL;
+fail:
+	mutex_unlock(&bdev->bd_mutex);
+	free_bd_holder(bo);
+	return err;
 }
 
 /**
@@ -987,15 +991,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 static void bd_release_from_kobject(struct block_device *bdev,
 					struct kobject *kobj)
 {
-	struct bd_holder *bo;
-
 	if (!kobj)
 		return;
 
 	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
-	if ((bo = del_bd_holder(bdev, kobj)))
-		free_bd_holder(bo);
+	free_bd_holder(del_bd_holder(bdev, kobj));
 	mutex_unlock(&bdev->bd_mutex);
 }
 
@@ -1346,7 +1347,6 @@ const struct file_operations def_blk_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
-	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
diff --git a/fs/buffer.c b/fs/buffer.c
index aa68206bd51..424165b569f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1026,11 +1026,6 @@ failed:
 /*
  * Create buffers for the specified block device block's page.  If
  * that page was dirty, the buffers are set dirty also.
- *
- * Except that's a bug.  Attaching dirty buffers to a dirty
- * blockdev's page can result in filesystem corruption, because
- * some of those buffers may be aliases of filesystem data.
- * grow_dev_page() will go BUG() if this happens.
  */
 static int
 grow_buffers(struct block_device *bdev, sector_t block, int size)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 6017c465440..07838b2ac1c 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -7,16 +7,16 @@
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or 
+ *   the Free Software Foundation; either version 2 of the License, or
  *   (at your option) any later version.
- * 
+ *
  *   This program is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  *   the GNU General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
- *   along with this program;  if not, write to the Free Software 
+ *   along with this program;  if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
 	char *charptr = data;
 	char buf[10], line[80];
 
-	printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", 
+	printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
 		label, length, data);
 	for (i = 0; i < length; i += 16) {
 		line[0] = 0;
@@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length)
 #ifdef CONFIG_CIFS_DEBUG2
 void cifs_dump_detail(struct smb_hdr * smb)
 {
-	cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
+	cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
 		  smb->Command, smb->Status.CifsError,
 		  smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
-	cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
+	cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
 }
 
 
@@ -72,36 +72,35 @@ void cifs_dump_mids(struct TCP_Server_Info * server)
 	struct list_head *tmp;
 	struct mid_q_entry * mid_entry;
 
-	if(server == NULL)
+	if (server == NULL)
 		return;
 
-	cERROR(1,("Dump pending requests:"));
+	cERROR(1, ("Dump pending requests:"));
 	spin_lock(&GlobalMid_Lock);
 	list_for_each(tmp, &server->pending_mid_q) {
 		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-		if(mid_entry) {
-			cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+		if (mid_entry) {
+			cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
 				mid_entry->midState,
 				(int)mid_entry->command,
 				mid_entry->pid,
 				mid_entry->tsk,
 				mid_entry->mid));
 #ifdef CONFIG_CIFS_STATS2
-			cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+			cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
 				mid_entry->largeBuf,
 				mid_entry->resp_buf,
 				mid_entry->when_received,
 				jiffies));
 #endif /* STATS2 */
-			cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+			cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
 				  mid_entry->multiEnd));
-			if(mid_entry->resp_buf) {
+			if (mid_entry->resp_buf) {
 				cifs_dump_detail(mid_entry->resp_buf);
 				cifs_dump_mem("existing buf: ",
 					mid_entry->resp_buf,
 					62 /* fixme */);
 			}
-			
 		}
 	}
 	spin_unlock(&GlobalMid_Lock);
@@ -129,9 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		    "Display Internal CIFS Data Structures for Debugging\n"
 		    "---------------------------------------------------\n");
 	buf += length;
-	length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION);
+	length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION);
 	buf += length;
-	length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid);
+	length = sprintf(buf,
+		"Active VFS Requests: %d\n", GlobalTotalActiveXid);
 	buf += length;
 	length = sprintf(buf, "Servers:");
 	buf += length;
@@ -141,7 +141,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 	list_for_each(tmp, &GlobalSMBSessionList) {
 		i++;
 		ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
-		if((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
+		if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
 		   (ses->serverNOS == NULL)) {
 			buf += sprintf(buf, "\nentry for %s not fully "
 					"displayed\n\t", ses->serverName);
@@ -149,15 +149,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		} else {
 			length =
 			    sprintf(buf,
-				    "\n%d) Name: %s  Domain: %s Mounts: %d OS: %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t",
+				    "\n%d) Name: %s  Domain: %s Mounts: %d OS:"
+				    " %s  \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
+				    " session status: %d\t",
 				i, ses->serverName, ses->serverDomain,
 				atomic_read(&ses->inUse),
 				ses->serverOS, ses->serverNOS,
-				ses->capabilities,ses->status);
+				ses->capabilities, ses->status);
 			buf += length;
 		}
-		if(ses->server) {
-			buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+		if (ses->server) {
+			buf += sprintf(buf, "TCP status: %d\n\tLocal Users To "
+				    "Server: %d SecMode: 0x%x Req On Wire: %d",
 				ses->server->tcpStatus,
 				atomic_read(&ses->server->socketUseCount),
 				ses->server->secMode,
@@ -165,7 +168,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 
 #ifdef CONFIG_CIFS_STATS2
 			buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
-				atomic_read(&ses->server->inSend), 
+				atomic_read(&ses->server->inSend),
 				atomic_read(&ses->server->num_waiters));
 #endif
 
@@ -177,17 +180,19 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 				mid_entry = list_entry(tmp1, struct
 					mid_q_entry,
 					qhead);
-				if(mid_entry) {
-					length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p mid %d\n",
-						mid_entry->midState,
-						(int)mid_entry->command,
-						mid_entry->pid,
-						mid_entry->tsk,
-						mid_entry->mid);
+				if (mid_entry) {
+					length = sprintf(buf,
+							"State: %d com: %d pid:"
+							" %d tsk: %p mid %d\n",
+							mid_entry->midState,
+							(int)mid_entry->command,
+							mid_entry->pid,
+							mid_entry->tsk,
+							mid_entry->mid);
 					buf += length;
 				}
 			}
-			spin_unlock(&GlobalMid_Lock); 
+			spin_unlock(&GlobalMid_Lock);
 		}
 
 	}
@@ -207,7 +212,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
 		length =
 		    sprintf(buf,
-			    "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d",
+			    "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
+			    "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
 			    i, tcon->treeName,
 			    atomic_read(&tcon->useCount),
 			    tcon->nativeFileSystem,
@@ -215,7 +221,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 			    le32_to_cpu(tcon->fsAttrInfo.Attributes),
 			    le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
 			    tcon->tidStatus);
-		buf += length;        
+		buf += length;
 		if (dev_type == FILE_DEVICE_DISK)
 			length = sprintf(buf, " type: DISK ");
 		else if (dev_type == FILE_DEVICE_CD_ROM)
@@ -224,7 +230,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 			length =
 			    sprintf(buf, " type: %d ", dev_type);
 		buf += length;
-		if(tcon->tidStatus == CifsNeedReconnect) {
+		if (tcon->tidStatus == CifsNeedReconnect) {
 			buf += sprintf(buf, "\tDISCONNECTED ");
 			length += 14;
 		}
@@ -238,9 +244,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 	/* Now calculate total size of returned data */
 	length = buf - original_buf;
 
-	if(offset + count >= length)
+	if (offset + count >= length)
 		*eof = 1;
-	if(length < offset) {
+	if (length < offset) {
 		*eof = 1;
 		return 0;
 	} else {
@@ -256,18 +262,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 
 static int
 cifs_stats_write(struct file *file, const char __user *buffer,
-               unsigned long count, void *data)
+		 unsigned long count, void *data)
 {
-        char c;
-        int rc;
+	char c;
+	int rc;
 	struct list_head *tmp;
 	struct cifsTconInfo *tcon;
 
-        rc = get_user(c, buffer);
-        if (rc)
-                return rc;
+	rc = get_user(c, buffer);
+	if (rc)
+		return rc;
 
-        if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+	if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
 		read_lock(&GlobalSMBSeslock);
 #ifdef CONFIG_CIFS_STATS2
 		atomic_set(&totBufAllocCount, 0);
@@ -297,14 +303,14 @@ cifs_stats_write(struct file *file, const char __user *buffer,
 		read_unlock(&GlobalSMBSeslock);
 	}
 
-        return count;
+	return count;
 }
 
 static int
 cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 		  int count, int *eof, void *data)
 {
-	int item_length,i,length;
+	int item_length, i, length;
 	struct list_head *tmp;
 	struct cifsTconInfo *tcon;
 
@@ -314,44 +320,44 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 			"Resources in use\nCIFS Session: %d\n",
 			sesInfoAllocCount.counter);
 	buf += length;
-	item_length = 
-		sprintf(buf,"Share (unique mount targets): %d\n",
+	item_length =
+		sprintf(buf, "Share (unique mount targets): %d\n",
 			tconInfoAllocCount.counter);
 	length += item_length;
-	buf += item_length;      
-	item_length = 
-		sprintf(buf,"SMB Request/Response Buffer: %d Pool size: %d\n",
+	buf += item_length;
+	item_length =
+		sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
 			bufAllocCount.counter,
 			cifs_min_rcv + tcpSesAllocCount.counter);
 	length += item_length;
 	buf += item_length;
-	item_length = 
-		sprintf(buf,"SMB Small Req/Resp Buffer: %d Pool size: %d\n",
-			smBufAllocCount.counter,cifs_min_small);
+	item_length =
+		sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
+			smBufAllocCount.counter, cifs_min_small);
 	length += item_length;
 	buf += item_length;
 #ifdef CONFIG_CIFS_STATS2
-        item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
+	item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
 				atomic_read(&totBufAllocCount),
-		                atomic_read(&totSmBufAllocCount));
+				atomic_read(&totSmBufAllocCount));
 	length += item_length;
 	buf += item_length;
 #endif /* CONFIG_CIFS_STATS2 */
 
-	item_length = 
-		sprintf(buf,"Operations (MIDs): %d\n",
+	item_length =
+		sprintf(buf, "Operations (MIDs): %d\n",
 			midCount.counter);
 	length += item_length;
 	buf += item_length;
 	item_length = sprintf(buf,
 		"\n%d session %d share reconnects\n",
-		tcpSesReconnectCount.counter,tconInfoReconnectCount.counter);
+		tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
 	length += item_length;
 	buf += item_length;
 
 	item_length = sprintf(buf,
 		"Total vfs operations: %d maximum at one time: %d\n",
-		GlobalCurrentXid,GlobalMaxActiveXid);
+		GlobalCurrentXid, GlobalMaxActiveXid);
 	length += item_length;
 	buf += item_length;
 
@@ -360,10 +366,10 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 	list_for_each(tmp, &GlobalTreeConnectionList) {
 		i++;
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-		item_length = sprintf(buf,"\n%d) %s",i, tcon->treeName);
+		item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName);
 		buf += item_length;
 		length += item_length;
-		if(tcon->tidStatus == CifsNeedReconnect) {
+		if (tcon->tidStatus == CifsNeedReconnect) {
 			buf += sprintf(buf, "\tDISCONNECTED ");
 			length += 14;
 		}
@@ -380,15 +386,15 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 		item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
 			atomic_read(&tcon->num_writes),
 			(long long)(tcon->bytes_written));
-                buf += item_length;
-                length += item_length;
-                item_length = sprintf(buf, 
+		buf += item_length;
+		length += item_length;
+		item_length = sprintf(buf,
 			"\nLocks: %d HardLinks: %d Symlinks: %d",
-                        atomic_read(&tcon->num_locks),
+			atomic_read(&tcon->num_locks),
 			atomic_read(&tcon->num_hardlinks),
 			atomic_read(&tcon->num_symlinks));
-                buf += item_length;
-                length += item_length;
+		buf += item_length;
+		length += item_length;
 
 		item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
 			atomic_read(&tcon->num_opens),
@@ -415,12 +421,12 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 	}
 	read_unlock(&GlobalSMBSeslock);
 
-	buf += sprintf(buf,"\n");
+	buf += sprintf(buf, "\n");
 	length++;
 
-	if(offset + count >= length)
+	if (offset + count >= length)
 		*eof = 1;
-	if(length < offset) {
+	if (length < offset) {
 		*eof = 1;
 		return 0;
 	} else {
@@ -428,7 +434,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
 	}
 	if (length > count)
 		length = count;
-		
+
 	return length;
 }
 #endif
@@ -547,11 +553,11 @@ cifs_proc_clean(void)
 	remove_proc_entry("MultiuserMount", proc_fs_cifs);
 	remove_proc_entry("OplockEnabled", proc_fs_cifs);
 /*	remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
-	remove_proc_entry("SecurityFlags",proc_fs_cifs);
-/*	remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */
-	remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
-	remove_proc_entry("Experimental",proc_fs_cifs);
-	remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
+	remove_proc_entry("SecurityFlags", proc_fs_cifs);
+/*	remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
+	remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
+	remove_proc_entry("Experimental", proc_fs_cifs);
+	remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
 	remove_proc_entry("cifs", proc_root_fs);
 }
 
@@ -590,7 +596,7 @@ cifsFYI_write(struct file *file, const char __user *buffer,
 		cifsFYI = 0;
 	else if (c == '1' || c == 'y' || c == 'Y')
 		cifsFYI = 1;
-	else if((c > '1') && (c <= '9'))
+	else if ((c > '1') && (c <= '9'))
 		cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
 
 	return count;
@@ -637,28 +643,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
 
 static int
 experimEnabled_read(char *page, char **start, off_t off,
-                   int count, int *eof, void *data)
+		    int count, int *eof, void *data)
 {
-        int len;
+	int len;
 
-        len = sprintf(page, "%d\n", experimEnabled);
+	len = sprintf(page, "%d\n", experimEnabled);
 
-        len -= off;
-        *start = page + off;
+	len -= off;
+	*start = page + off;
 
-        if (len > count)
-                len = count;
-        else
-                *eof = 1;
+	if (len > count)
+		len = count;
+	else
+		*eof = 1;
 
-        if (len < 0)
-                len = 0;
+	if (len < 0)
+		len = 0;
 
-        return len;
+	return len;
 }
 static int
 experimEnabled_write(struct file *file, const char __user *buffer,
-                    unsigned long count, void *data)
+		     unsigned long count, void *data)
 {
 	char c;
 	int rc;
@@ -678,46 +684,46 @@ experimEnabled_write(struct file *file, const char __user *buffer,
 
 static int
 linuxExtensionsEnabled_read(char *page, char **start, off_t off,
-                   int count, int *eof, void *data)
+			    int count, int *eof, void *data)
 {
-        int len;
+	int len;
 
-        len = sprintf(page, "%d\n", linuxExtEnabled);
-        len -= off;
-        *start = page + off;
+	len = sprintf(page, "%d\n", linuxExtEnabled);
+	len -= off;
+	*start = page + off;
 
-        if (len > count)
-                len = count;
-        else
-                *eof = 1;
+	if (len > count)
+		len = count;
+	else
+		*eof = 1;
 
-        if (len < 0)
-                len = 0;
+	if (len < 0)
+		len = 0;
 
-        return len;
+	return len;
 }
 static int
 linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
-                    unsigned long count, void *data)
+			     unsigned long count, void *data)
 {
-        char c;
-        int rc;
-
-        rc = get_user(c, buffer);
-        if (rc)
-                return rc;
-        if (c == '0' || c == 'n' || c == 'N')
-                linuxExtEnabled = 0;
-        else if (c == '1' || c == 'y' || c == 'Y')
-                linuxExtEnabled = 1;
-
-        return count;
+	char c;
+	int rc;
+
+	rc = get_user(c, buffer);
+	if (rc)
+		return rc;
+	if (c == '0' || c == 'n' || c == 'N')
+		linuxExtEnabled = 0;
+	else if (c == '1' || c == 'y' || c == 'Y')
+		linuxExtEnabled = 1;
+
+	return count;
 }
 
 
 static int
 lookupFlag_read(char *page, char **start, off_t off,
-		   int count, int *eof, void *data)
+		int count, int *eof, void *data)
 {
 	int len;
 
@@ -860,15 +866,15 @@ security_flags_write(struct file *file, const char __user *buffer,
 	char flags_string[12];
 	char c;
 
-	if((count < 1) || (count > 11))
+	if ((count < 1) || (count > 11))
 		return -EINVAL;
 
 	memset(flags_string, 0, 12);
 
-	if(copy_from_user(flags_string, buffer, count))
+	if (copy_from_user(flags_string, buffer, count))
 		return -EFAULT;
 
-	if(count < 3) {
+	if (count < 3) {
 		/* single char or single char followed by null */
 		c = flags_string[0];
 		if (c == '0' || c == 'n' || c == 'N')
@@ -881,15 +887,15 @@ security_flags_write(struct file *file, const char __user *buffer,
 
 	flags = simple_strtoul(flags_string, NULL, 0);
 
-	cFYI(1,("sec flags 0x%x", flags));
+	cFYI(1, ("sec flags 0x%x", flags));
 
-	if(flags <= 0)  {
-		cERROR(1,("invalid security flags %s",flags_string));
+	if (flags <= 0)  {
+		cERROR(1, ("invalid security flags %s", flags_string));
 		return -EINVAL;
 	}
 
-	if(flags & ~CIFSSEC_MASK) {
-		cERROR(1,("attempt to set unsupported security flags 0x%x",
+	if (flags & ~CIFSSEC_MASK) {
+		cERROR(1, ("attempt to set unsupported security flags 0x%x",
 			flags & ~CIFSSEC_MASK));
 		return -EINVAL;
 	}
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 793c4b95c16..701e9a9185f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -6,16 +6,16 @@
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or 
+ *   the Free Software Foundation; either version 2 of the License, or
  *   (at your option) any later version.
- * 
+ *
  *   This program is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  *   the GNU General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
- *   along with this program;  if not, write to the Free Software 
+ *   along with this program;  if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
@@ -32,7 +32,7 @@
  *
  */
 int
-cifs_strfromUCS_le(char *to, const __le16 * from,	
+cifs_strfromUCS_le(char *to, const __le16 * from,
 		   int len, const struct nls_table *codepage)
 {
 	int i;
@@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
 {
 	int charlen;
 	int i;
-	wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */  
+	wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */
 
 	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 
@@ -79,7 +79,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
 			/* A question mark */
 			to[i] = cpu_to_le16(0x003f);
 			charlen = 1;
-		} else 
+		} else
 			to[i] = cpu_to_le16(wchar_to[i]);
 
 	}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d38c69b591c..8b0cbf4a4ad 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap  = cifs_file_mmap,
-	.sendfile = generic_file_sendfile,
+	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
 	.ioctl	= cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.lock = cifs_lock,
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
-	.sendfile = generic_file_sendfile, /* BB removeme BB */
+	.splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
 	.ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap  = cifs_file_mmap,
-	.sendfile = generic_file_sendfile,
+	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
 	.ioctl	= cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.release = cifs_close,
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
-	.sendfile = generic_file_sendfile, /* BB removeme BB */
+	.splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
 	.ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -825,8 +825,8 @@ cifs_init_mids(void)
 				sizeof (struct oplock_q_entry), 0,
 				SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (cifs_oplock_cachep == NULL) {
-		kmem_cache_destroy(cifs_mid_cachep);
 		mempool_destroy(cifs_mid_poolp);
+		kmem_cache_destroy(cifs_mid_cachep);
 		return -ENOMEM;
 	}
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 14de58fa143..57419a17668 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -433,8 +433,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	cFYI(1,("secFlags 0x%x",secFlags));
 
 	pSMB->hdr.Mid = GetNextMid(server);
-	pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
-	if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
+	pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
+	if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
 	
 	count = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 216fb625843..f4e92661b22 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2069,8 +2069,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			srvTcp->tcpStatus = CifsExiting;
 			spin_unlock(&GlobalMid_Lock);
 			if (srvTcp->tsk) {
+				struct task_struct *tsk;
+				/* If we could verify that kthread_stop would
+				   always wake up processes blocked in
+				   tcp in recv_mesg then we could remove the
+				   send_sig call */
 				send_sig(SIGKILL,srvTcp->tsk,1);
-				kthread_stop(srvTcp->tsk);
+				tsk = srvTcp->tsk;
+				if(tsk)
+					kthread_stop(tsk);
 			}
 		}
 		 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2085,8 +2092,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 					/* if the socketUseCount is now zero */
 					if ((temp_rc == -ESHUTDOWN) &&
 					   (pSesInfo->server) && (pSesInfo->server->tsk)) {
+						struct task_struct *tsk;
 						send_sig(SIGKILL,pSesInfo->server->tsk,1);
-						kthread_stop(pSesInfo->server->tsk);
+						tsk = pSesInfo->server->tsk;
+						if (tsk)
+							kthread_stop(tsk);
 					}
 				} else
 					cFYI(1, ("No session or bad tcon"));
@@ -3334,7 +3344,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 				return 0;
 			} else if (rc == -ESHUTDOWN) {
 				cFYI(1,("Waking up socket by sending it signal"));
-				if(cifsd_task) {
+				if (cifsd_task) {
 					send_sig(SIGKILL,cifsd_task,1);
 					kthread_stop(cifsd_task);
 				}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e5210519ac4..8e86aaceb68 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -2,7 +2,7 @@
  *   fs/cifs/dir.c
  *
  *   vfs operations that deal with dentries
- * 
+ *
  *   Copyright (C) International Business Machines  Corp., 2002,2005
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
@@ -34,11 +34,12 @@
 static void
 renew_parental_timestamps(struct dentry *direntry)
 {
-	/* BB check if there is a way to get the kernel to do this or if we really need this */
+	/* BB check if there is a way to get the kernel to do this or if we
+	   really need this */
 	do {
 		direntry->d_time = jiffies;
 		direntry = direntry->d_parent;
-	} while (!IS_ROOT(direntry));	
+	} while (!IS_ROOT(direntry));
 }
 
 /* Note: caller must free return buffer */
@@ -51,7 +52,7 @@ build_path_from_dentry(struct dentry *direntry)
 	char *full_path;
 	char dirsep;
 
-	if(direntry == NULL)
+	if (direntry == NULL)
 		return NULL;  /* not much we can do if dentry is freed and
 		we need to reopen the file after it was closed implicitly
 		when the server crashed */
@@ -59,18 +60,18 @@ build_path_from_dentry(struct dentry *direntry)
 	dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
 	pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
-	namelen = pplen; 
+	namelen = pplen;
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
-		if(temp == NULL) {
-			cERROR(1,("corrupt dentry"));
+		if (temp == NULL) {
+			cERROR(1, ("corrupt dentry"));
 			return NULL;
 		}
 	}
 
 	full_path = kmalloc(namelen+1, GFP_KERNEL);
-	if(full_path == NULL)
+	if (full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
 	for (temp = direntry; !IS_ROOT(temp);) {
@@ -84,8 +85,8 @@ cifs_bp_rename_retry:
 			cFYI(0, ("name: %s", full_path + namelen));
 		}
 		temp = temp->d_parent;
-		if(temp == NULL) {
-			cERROR(1,("corrupt dentry"));
+		if (temp == NULL) {
+			cERROR(1, ("corrupt dentry"));
 			kfree(full_path);
 			return NULL;
 		}
@@ -94,7 +95,7 @@ cifs_bp_rename_retry:
 		cERROR(1,
 		       ("did not end path lookup where expected namelen is %d",
 			namelen));
-		/* presumably this is only possible if racing with a rename 
+		/* presumably this is only possible if racing with a rename
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
 		kfree(full_path);
@@ -106,7 +107,7 @@ cifs_bp_rename_retry:
 	   since the '\' is a valid posix character so we can not switch
 	   those safely to '/' if any are found in the middle of the prepath */
 	/* BB test paths to Windows with '/' in the midst of prepath */
-	strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
+	strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen);
 	return full_path;
 }
 
@@ -147,12 +148,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	pTcon = cifs_sb->tcon;
 
 	full_path = build_path_from_dentry(direntry);
-	if(full_path == NULL) {
+	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
 	}
 
-	if(nd && (nd->flags & LOOKUP_OPEN)) {
+	if (nd && (nd->flags & LOOKUP_OPEN)) {
 		int oflags = nd->intent.open.flags;
 
 		desiredAccess = 0;
@@ -164,28 +165,29 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 				write_only = TRUE;
 		}
 
-		if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+		if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 			disposition = FILE_CREATE;
-		else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+		else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 			disposition = FILE_OVERWRITE_IF;
-		else if((oflags & O_CREAT) == O_CREAT)
+		else if ((oflags & O_CREAT) == O_CREAT)
 			disposition = FILE_OPEN_IF;
 		else {
-			cFYI(1,("Create flag not set in create function"));
+			cFYI(1, ("Create flag not set in create function"));
 		}
 	}
 
-	/* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */
+	/* BB add processing to set equivalent of mode - e.g. via CreateX with
+	   ACLs */
 	if (oplockEnabled)
 		oplock = REQ_OPLOCK;
 
-	buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL);
-	if(buf == NULL) {
+	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+	if (buf == NULL) {
 		kfree(full_path);
 		FreeXid(xid);
 		return -ENOMEM;
 	}
-	if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 
+	if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
 		rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
 			 desiredAccess, CREATE_NOT_DIR,
 			 &fileHandle, &oplock, buf, cifs_sb->local_nls,
@@ -193,27 +195,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	else
 		rc = -EIO; /* no NT SMB support fall into legacy open below */
 
-	if(rc == -EIO) {
+	if (rc == -EIO) {
 		/* old server, retry the open legacy style */
 		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
 			desiredAccess, CREATE_NOT_DIR,
 			&fileHandle, &oplock, buf, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	} 
+	}
 	if (rc) {
 		cFYI(1, ("cifs_create returned 0x%x", rc));
 	} else {
 		/* If Open reported that we actually created a file
 		then we now have to set the mode if possible */
 		if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
-			(oplock & CIFS_CREATE_ACTION))
-			if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+			(oplock & CIFS_CREATE_ACTION)) {
+			mode &= ~current->fs->umask;
+			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 				CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
 					(__u64)current->fsuid,
 					(__u64)current->fsgid,
 					0 /* dev */,
-					cifs_sb->local_nls, 
-					cifs_sb->mnt_cifs_flags & 
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			} else {
 				CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
@@ -221,26 +224,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 					(__u64)-1,
 					0 /* dev */,
 					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags & 
+					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			}
-		else {
-			/* BB implement mode setting via Windows security descriptors */
-			/* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/
-			/* could set r/o dos attribute if mode & 0222 == 0 */
+		} else {
+			/* BB implement mode setting via Windows security
+			   descriptors e.g. */
+			/* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/
+
+			/* Could set r/o dos attribute if mode & 0222 == 0 */
 		}
 
 	/* BB server might mask mode so we have to query for Unix case*/
 		if (pTcon->ses->capabilities & CAP_UNIX)
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
-						 inode->i_sb,xid);
+						 inode->i_sb, xid);
 		else {
 			rc = cifs_get_inode_info(&newinode, full_path,
-						 buf, inode->i_sb,xid);
-			if(newinode) {
+						 buf, inode->i_sb, xid);
+			if (newinode) {
 				newinode->i_mode = mode;
-				if((oplock & CIFS_CREATE_ACTION) &&
-				  (cifs_sb->mnt_cifs_flags & 
+				if ((oplock & CIFS_CREATE_ACTION) &&
+				    (cifs_sb->mnt_cifs_flags &
 				     CIFS_MOUNT_SET_UID)) {
 					newinode->i_uid = current->fsuid;
 					newinode->i_gid = current->fsgid;
@@ -259,14 +264,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 				direntry->d_op = &cifs_dentry_ops;
 			d_instantiate(direntry, newinode);
 		}
-		if((nd->flags & LOOKUP_OPEN) == FALSE) {
+		if ((nd->flags & LOOKUP_OPEN) == FALSE) {
 			/* mknod case - do not leave file open */
 			CIFSSMBClose(xid, pTcon, fileHandle);
-		} else if(newinode) {
+		} else if (newinode) {
 			pCifsFile =
 			   kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
-			
-			if(pCifsFile == NULL)
+
+			if (pCifsFile == NULL)
 				goto cifs_create_out;
 			pCifsFile->netfid = fileHandle;
 			pCifsFile->pid = current->tgid;
@@ -276,33 +281,33 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			init_MUTEX(&pCifsFile->fh_sem);
 			mutex_init(&pCifsFile->lock_mutex);
 			INIT_LIST_HEAD(&pCifsFile->llist);
-			atomic_set(&pCifsFile->wrtPending,0);
+			atomic_set(&pCifsFile->wrtPending, 0);
 
-			/* set the following in open now 
+			/* set the following in open now
 				pCifsFile->pfile = file; */
 			write_lock(&GlobalSMBSeslock);
-			list_add(&pCifsFile->tlist,&pTcon->openFileList);
+			list_add(&pCifsFile->tlist, &pTcon->openFileList);
 			pCifsInode = CIFS_I(newinode);
-			if(pCifsInode) {
+			if (pCifsInode) {
 				/* if readable file instance put first in list*/
 				if (write_only == TRUE) {
-                                       	list_add_tail(&pCifsFile->flist,
+					list_add_tail(&pCifsFile->flist,
 						&pCifsInode->openFileList);
 				} else {
 					list_add(&pCifsFile->flist,
 						&pCifsInode->openFileList);
 				}
-				if((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+				if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
 					pCifsInode->clientCanCacheAll = TRUE;
 					pCifsInode->clientCanCacheRead = TRUE;
-					cFYI(1,("Exclusive Oplock for inode %p",
+					cFYI(1, ("Exclusive Oplock inode %p",
 						newinode));
-				} else if((oplock & 0xF) == OPLOCK_READ)
+				} else if ((oplock & 0xF) == OPLOCK_READ)
 					pCifsInode->clientCanCacheRead = TRUE;
 			}
 			write_unlock(&GlobalSMBSeslock);
 		}
-	} 
+	}
 cifs_create_out:
 	kfree(buf);
 	kfree(full_path);
@@ -310,8 +315,8 @@ cifs_create_out:
 	return rc;
 }
 
-int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, 
-		dev_t device_number) 
+int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
+		dev_t device_number)
 {
 	int rc = -EPERM;
 	int xid;
@@ -329,43 +334,45 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 	pTcon = cifs_sb->tcon;
 
 	full_path = build_path_from_dentry(direntry);
-	if(full_path == NULL)
+	if (full_path == NULL)
 		rc = -ENOMEM;
 	else if (pTcon->ses->capabilities & CAP_UNIX) {
-		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+		mode &= ~current->fs->umask;
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 			rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-				mode,(__u64)current->fsuid,(__u64)current->fsgid,
+				mode, (__u64)current->fsuid,
+				(__u64)current->fsgid,
 				device_number, cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags & 
+				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		} else {
 			rc = CIFSSMBUnixSetPerms(xid, pTcon,
 				full_path, mode, (__u64)-1, (__u64)-1,
 				device_number, cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags & 
+				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		}
 
-		if(!rc) {
+		if (!rc) {
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
-						inode->i_sb,xid);
+						inode->i_sb, xid);
 			if (pTcon->nocase)
 				direntry->d_op = &cifs_ci_dentry_ops;
 			else
 				direntry->d_op = &cifs_dentry_ops;
-			if(rc == 0)
+			if (rc == 0)
 				d_instantiate(direntry, newinode);
 		}
 	} else {
-		if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
 			int oplock = 0;
 			u16 fileHandle;
 			FILE_ALL_INFO * buf;
 
-			cFYI(1,("sfu compat create special file"));
+			cFYI(1, ("sfu compat create special file"));
 
-			buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL);
-			if(buf == NULL) {
+			buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+			if (buf == NULL) {
 				kfree(full_path);
 				FreeXid(xid);
 				return -ENOMEM;
@@ -373,39 +380,38 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 
 			rc = CIFSSMBOpen(xid, pTcon, full_path,
 					 FILE_CREATE, /* fail if exists */
-					 GENERIC_WRITE /* BB would 
+					 GENERIC_WRITE /* BB would
 					  WRITE_OWNER | WRITE_DAC be better? */,
 					 /* Create a file and set the
 					    file attribute to SYSTEM */
 					 CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
 					 &fileHandle, &oplock, buf,
 					 cifs_sb->local_nls,
-					 cifs_sb->mnt_cifs_flags & 
+					 cifs_sb->mnt_cifs_flags &
 					    CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 			/* BB FIXME - add handling for backlevel servers
 			   which need legacy open and check for all
-			   calls to SMBOpen for fallback to 
-			   SMBLeagcyOpen */
-			if(!rc) {
+			   calls to SMBOpen for fallback to SMBLeagcyOpen */
+			if (!rc) {
 				/* BB Do not bother to decode buf since no
 				   local inode yet to put timestamps in,
 				   but we can reuse it safely */
 				int bytes_written;
 				struct win_dev *pdev;
 				pdev = (struct win_dev *)buf;
-				if(S_ISCHR(mode)) {
+				if (S_ISCHR(mode)) {
 					memcpy(pdev->type, "IntxCHR", 8);
 					pdev->major =
 					      cpu_to_le64(MAJOR(device_number));
-					pdev->minor = 
+					pdev->minor =
 					      cpu_to_le64(MINOR(device_number));
 					rc = CIFSSMBWrite(xid, pTcon,
 						fileHandle,
 						sizeof(struct win_dev),
 						0, &bytes_written, (char *)pdev,
 						NULL, 0);
-				} else if(S_ISBLK(mode)) {
+				} else if (S_ISBLK(mode)) {
 					memcpy(pdev->type, "IntxBLK", 8);
 					pdev->major =
 					      cpu_to_le64(MAJOR(device_number));
@@ -432,7 +438,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 
 
 struct dentry *
-cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd)
+cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
+	    struct nameidata *nd)
 {
 	int xid;
 	int rc = 0; /* to get around spurious gcc warning, set to zero here */
@@ -447,8 +454,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 	     (" parent inode = 0x%p name is: %s and dentry = 0x%p",
 	      parent_dir_inode, direntry->d_name.name, direntry));
 
-	/* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
-
 	/* check whether path exists */
 
 	cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
@@ -472,7 +477,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 	deadlock in the cases (beginning of sys_rename itself)
 	in which we already have the sb rename sem */
 	full_path = build_path_from_dentry(direntry);
-	if(full_path == NULL) {
+	if (full_path == NULL) {
 		FreeXid(xid);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -487,10 +492,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 
 	if (pTcon->ses->capabilities & CAP_UNIX)
 		rc = cifs_get_inode_info_unix(&newInode, full_path,
-					      parent_dir_inode->i_sb,xid);
+					      parent_dir_inode->i_sb, xid);
 	else
 		rc = cifs_get_inode_info(&newInode, full_path, NULL,
-					 parent_dir_inode->i_sb,xid);
+					 parent_dir_inode->i_sb, xid);
 
 	if ((rc == 0) && (newInode != NULL)) {
 		if (pTcon->nocase)
@@ -499,7 +504,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 			direntry->d_op = &cifs_dentry_ops;
 		d_add(direntry, newInode);
 
-		/* since paths are not looked up by component - the parent 
+		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
 
@@ -511,13 +516,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
 		else
 			direntry->d_op = &cifs_dentry_ops;
 		d_add(direntry, NULL);
-	/*	if it was once a directory (but how can we tell?) we could do  
-			shrink_dcache_parent(direntry); */
+	/*	if it was once a directory (but how can we tell?) we could do
+		shrink_dcache_parent(direntry); */
 	} else {
-		cERROR(1,("Error 0x%x on cifs_get_inode_info in lookup of %s",
-			   rc,full_path));
-		/* BB special case check for Access Denied - watch security 
-		exposure of returning dir info implicitly via different rc 
+		cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s",
+			   rc, full_path));
+		/* BB special case check for Access Denied - watch security
+		exposure of returning dir info implicitly via different rc
 		if file exists or not but no access BB */
 	}
 
@@ -538,11 +543,11 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 	} else {
 		cFYI(1, ("neg dentry 0x%p name = %s",
 			 direntry, direntry->d_name.name));
-		if(time_after(jiffies, direntry->d_time + HZ) || 
+		if (time_after(jiffies, direntry->d_time + HZ) ||
 			!lookupCacheEnabled) {
 			d_drop(direntry);
 			isValid = 0;
-		} 
+		}
 	}
 
 	return isValid;
@@ -559,8 +564,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 
 struct dentry_operations cifs_dentry_ops = {
 	.d_revalidate = cifs_d_revalidate,
-/* d_delete:       cifs_d_delete,       *//* not needed except for debugging */
-	/* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */
+/* d_delete:       cifs_d_delete,      */ /* not needed except for debugging */
 };
 
 static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index da12b482ebe..8e375bb4b37 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -2,7 +2,7 @@
  *   fs/cifs/fcntl.c
  *
  *   vfs operations that deal with the file control API
- * 
+ *
  *   Copyright (C) International Business Machines  Corp., 2003,2004
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
@@ -35,35 +35,34 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
 
 	/* No way on Linux VFS to ask to monitor xattr
 	changes (and no stream support either */
-	if(fcntl_notify_flags & DN_ACCESS) {
+	if (fcntl_notify_flags & DN_ACCESS) {
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
 	}
-	if(fcntl_notify_flags & DN_MODIFY) {
+	if (fcntl_notify_flags & DN_MODIFY) {
 		/* What does this mean on directories? */
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
 			FILE_NOTIFY_CHANGE_SIZE;
 	}
-	if(fcntl_notify_flags & DN_CREATE) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | 
+	if (fcntl_notify_flags & DN_CREATE) {
+		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
 			FILE_NOTIFY_CHANGE_LAST_WRITE;
 	}
-	if(fcntl_notify_flags & DN_DELETE) {
+	if (fcntl_notify_flags & DN_DELETE) {
 		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
 	}
-	if(fcntl_notify_flags & DN_RENAME) {
+	if (fcntl_notify_flags & DN_RENAME) {
 		/* BB review this - checking various server behaviors */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | 
+		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
 			FILE_NOTIFY_CHANGE_FILE_NAME;
 	}
-	if(fcntl_notify_flags & DN_ATTRIB) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | 
+	if (fcntl_notify_flags & DN_ATTRIB) {
+		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
 			FILE_NOTIFY_CHANGE_ATTRIBUTES;
 	}
-/*	if(fcntl_notify_flags & DN_MULTISHOT) {
+/*	if (fcntl_notify_flags & DN_MULTISHOT) {
 		cifs_ntfy_flags |= ;
 	} */ /* BB fixme - not sure how to handle this with CIFS yet */
 
-
 	return cifs_ntfy_flags;
 }
 
@@ -78,8 +77,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 	__u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
 	__u16 netfid;
 
-
-	if(experimEnabled == 0)
+	if (experimEnabled == 0)
 		return 0;
 
 	xid = GetXid();
@@ -88,21 +86,21 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 
-	if(full_path == NULL) {
+	if (full_path == NULL) {
 		rc = -ENOMEM;
 	} else {
-		cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg));
-		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 
+		cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
+		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
 			GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
-			&netfid, &oplock,NULL, cifs_sb->local_nls,
+			&netfid, &oplock, NULL, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 		/* BB fixme - add this handle to a notify handle list */
-		if(rc) {
-			cFYI(1,("Could not open directory for notify"));
+		if (rc) {
+			cFYI(1, ("Could not open directory for notify"));
 		} else {
 			filter = convert_to_cifs_notify_flags(arg);
-			if(filter != 0) {
-				rc = CIFSSMBNotify(xid, pTcon, 
+			if (filter != 0) {
+				rc = CIFSSMBNotify(xid, pTcon,
 					0 /* no subdirs */, netfid,
 					filter, file, arg & DN_MULTISHOT,
 					cifs_sb->local_nls);
@@ -113,10 +111,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 			it would close automatically but may be a way
 			to do it easily when inode freed or when
 			notify info is cleared/changed */
-			cFYI(1,("notify rc %d",rc));
+			cFYI(1, ("notify rc %d", rc));
 		}
 	}
-	
+
 	FreeXid(xid);
 	return rc;
 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3e87dad3367..f0ff12b3f39 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -986,7 +986,8 @@ mkdir_get_info:
 		  * failed to get it from the server or was set bogus */ 
 		if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
 				direntry->d_inode->i_nlink = 2; 
-		if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
+		if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) {
+			mode &= ~current->fs->umask;
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 				CIFSSMBUnixSetPerms(xid, pTcon, full_path,
 						    mode,
@@ -1004,7 +1005,7 @@ mkdir_get_info:
 						    cifs_sb->mnt_cifs_flags & 
 						    CIFS_MOUNT_MAP_SPECIAL_CHR);
 			}
-		else {
+		} else {
 			/* BB to be implemented via Windows secrty descriptors
 			   eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
 						 -1, -1, local_nls); */
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index e34c7db00f6..a414f1775ae 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
 
 #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
 
-int cifs_ioctl (struct inode * inode, struct file * filep, 
+int cifs_ioctl (struct inode * inode, struct file * filep,
 		unsigned int command, unsigned long arg)
 {
 	int rc = -ENOTTY; /* strange error - but the precedent */
@@ -47,13 +47,13 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 
 	xid = GetXid();
 
-        cFYI(1,("ioctl file %p  cmd %u  arg %lu",filep,command,arg));
+	cFYI(1, ("ioctl file %p  cmd %u  arg %lu", filep, command, arg));
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 
 #ifdef CONFIG_CIFS_POSIX
 	tcon = cifs_sb->tcon;
-	if(tcon)
+	if (tcon)
 		caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 	else {
 		rc = -EIO;
@@ -62,24 +62,24 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 	}
 #endif /* CONFIG_CIFS_POSIX */
 
-	switch(command) {
+	switch (command) {
 		case CIFS_IOC_CHECKUMOUNT:
-			cFYI(1,("User unmount attempted"));
-			if(cifs_sb->mnt_uid == current->uid)
+			cFYI(1, ("User unmount attempted"));
+			if (cifs_sb->mnt_uid == current->uid)
 				rc = 0;
 			else {
 				rc = -EACCES;
-				cFYI(1,("uids do not match"));
+				cFYI(1, ("uids do not match"));
 			}
 			break;
 #ifdef CONFIG_CIFS_POSIX
 		case FS_IOC_GETFLAGS:
-			if(CIFS_UNIX_EXTATTR_CAP & caps) {
+			if (CIFS_UNIX_EXTATTR_CAP & caps) {
 				if (pSMBFile == NULL)
 					break;
 				rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
 					&ExtAttrBits, &ExtAttrMask);
-				if(rc == 0)
+				if (rc == 0)
 					rc = put_user(ExtAttrBits &
 						FS_FL_USER_VISIBLE,
 						(int __user *)arg);
@@ -87,8 +87,8 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 			break;
 
 		case FS_IOC_SETFLAGS:
-			if(CIFS_UNIX_EXTATTR_CAP & caps) {
-				if(get_user(ExtAttrBits,(int __user *)arg)) {
+			if (CIFS_UNIX_EXTATTR_CAP & caps) {
+				if (get_user(ExtAttrBits, (int __user *)arg)) {
 					rc = -EFAULT;
 					break;
 				}
@@ -96,16 +96,15 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 					break;
 				/* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
 					extAttrBits, &ExtAttrMask);*/
-				
 			}
-			cFYI(1,("set flags not implemented yet"));
+			cFYI(1, ("set flags not implemented yet"));
 			break;
 #endif /* CONFIG_CIFS_POSIX */
 		default:
-			cFYI(1,("unsupported ioctl"));
+			cFYI(1, ("unsupported ioctl"));
 			break;
 	}
 
 	FreeXid(xid);
 	return rc;
-} 
+}
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index aede606132a..8b69fcceb59 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -18,7 +18,7 @@
  *
  *   You should have received a copy of the GNU Lesser General Public License
  *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed6216..898a86dde8f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 
 		ret = -ENOENT;
 		if (!IS_DEADDIR(host_inode)) {
-			ret = host_file->f_op->readdir(host_file, filldir, dirent);
+			ret = host_file->f_op->readdir(host_file, dirent, filldir);
 			file_accessed(host_file);
 		}
 	}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7..99dbe866816 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
 }
 
 static ssize_t
-coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
-		   read_actor_t actor, void *target)
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
+		      struct pipe_inode_info *pipe, size_t count,
+		      unsigned int flags)
 {
 	struct coda_file_info *cfi;
 	struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->sendfile)
+	if (!host_file->f_op || !host_file->f_op->splice_read)
 		return -EINVAL;
 
-	return host_file->f_op->sendfile(host_file, ppos, count, actor, target);
+	return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
 }
 
 static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
 	.flush		= coda_flush,
 	.release	= coda_release,
 	.fsync		= coda_fsync,
-	.sendfile	= coda_file_sendfile,
+	.splice_read	= coda_file_splice_read,
 };
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b44cdc96fa..e440a7b95d0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -63,6 +63,7 @@
 #include <linux/wireless.h>
 #include <linux/atalk.h>
 #include <linux/blktrace_api.h>
+#include <linux/loop.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci.h>
@@ -3489,6 +3490,9 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
 
 IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
 IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
+
+/* loop */
+IGNORE_IOCTL(LOOP_CLR_FD)
 };
 
 #define IOCTL_HASHSIZE 256
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264d..1d533a2ec3a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
 
+/**
+ * debugfs_rename - rename a file/directory in the debugfs filesystem
+ * @old_dir: a pointer to the parent dentry for the renamed object. This
+ *          should be a directory dentry.
+ * @old_dentry: dentry of an object to be renamed.
+ * @new_dir: a pointer to the parent dentry where the object should be
+ *          moved. This should be a directory dentry.
+ * @new_name: a pointer to a string containing the target name.
+ *
+ * This function renames a file/directory in debugfs.  The target must not
+ * exist for rename to succeed.
+ *
+ * This function will return a pointer to old_dentry (which is updated to
+ * reflect renaming) if it succeeds. If an error occurs, %NULL will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+		struct dentry *new_dir, const char *new_name)
+{
+	int error;
+	struct dentry *dentry = NULL, *trap;
+	const char *old_name;
+
+	trap = lock_rename(new_dir, old_dir);
+	/* Source or destination directories don't exist? */
+	if (!old_dir->d_inode || !new_dir->d_inode)
+		goto exit;
+	/* Source does not exist, cyclic rename, or mountpoint? */
+	if (!old_dentry->d_inode || old_dentry == trap ||
+	    d_mountpoint(old_dentry))
+		goto exit;
+	dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
+	/* Lookup failed, cyclic rename or target exists? */
+	if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
+		goto exit;
+
+	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+
+	error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
+		dentry);
+	if (error) {
+		fsnotify_oldname_free(old_name);
+		goto exit;
+	}
+	d_move(old_dentry, dentry);
+	fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
+		old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+		NULL, old_dentry->d_inode);
+	fsnotify_oldname_free(old_name);
+	unlock_rename(new_dir, old_dir);
+	dput(dentry);
+	return old_dentry;
+exit:
+	if (dentry && !IS_ERR(dentry))
+		dput(dentry);
+	unlock_rename(new_dir, old_dir);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(debugfs_rename);
+
 static decl_subsys(debug, NULL, NULL);
 
 static int __init debugfs_init(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8593f3dfd29..52bb2638f7a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1106,7 +1106,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	ret2 = --dio->refcount;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
-	BUG_ON(!dio->is_async && ret2 != 0);
+
 	if (ret2 == 0) {
 		ret = dio_complete(dio, offset, ret);
 		kfree(dio);
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e49..54bcc00ec8d 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
 
 config DLM
 	tristate "Distributed Lock Manager (DLM)"
-	depends on IPV6 || IPV6=n
+	depends on SYSFS && (IPV6 || IPV6=n)
 	select CONFIGFS_FS
 	select IP_SCTP
 	help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f3..d248e60951b 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y :=			ast.o \
 				member.o \
 				memory.o \
 				midcomms.o \
+				netlink.o \
 				lowcomms.o \
 				rcom.o \
 				recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd143..5069b2cb5a1 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
 	unsigned int cl_scan_secs;
 	unsigned int cl_log_debug;
 	unsigned int cl_protocol;
+	unsigned int cl_timewarn_cs;
 };
 
 enum {
@@ -103,6 +104,7 @@ enum {
 	CLUSTER_ATTR_SCAN_SECS,
 	CLUSTER_ATTR_LOG_DEBUG,
 	CLUSTER_ATTR_PROTOCOL,
+	CLUSTER_ATTR_TIMEWARN_CS,
 };
 
 struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
 	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	NULL,
 };
 
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
 	cl->cl_scan_secs = dlm_config.ci_scan_secs;
 	cl->cl_log_debug = dlm_config.ci_log_debug;
+	cl->cl_protocol = dlm_config.ci_protocol;
+	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
 
 static struct space *get_space(char *name)
 {
+	struct config_item *i;
+
 	if (!space_list)
 		return NULL;
-	return to_space(config_group_find_obj(space_list, name));
+
+	down(&space_list->cg_subsys->su_sem);
+	i = config_group_find_obj(space_list, name);
+	up(&space_list->cg_subsys->su_sem);
+
+	return to_space(i);
 }
 
 static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 			if (cm->nodeid != nodeid)
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		} else {
 			if (!cm->addr_count ||
 			    memcmp(cm->addr[0], addr, sizeof(*addr)))
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		}
 	}
 	up(&clusters_root.subsys.su_sem);
 
-	if (found)
-		config_item_get(i);
-	else
+	if (!found)
 		cm = NULL;
 	return cm;
 }
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
 	.ci_scan_secs = DEFAULT_SCAN_SECS,
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
-	.ci_protocol = DEFAULT_PROTOCOL
+	.ci_protocol = DEFAULT_PROTOCOL,
+	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };
 
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5..a3170fe2209 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
 	int ci_scan_secs;
 	int ci_log_debug;
 	int ci_protocol;
+	int ci_timewarn_cs;
 };
 
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e0..12c3bfd5e66 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 
 #include "dlm_internal.h"
+#include "lock.h"
 
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
 
 struct rsb_iter {
 	int entry;
+	int locks;
+	int header;
 	struct dlm_ls *ls;
 	struct list_head *next;
 	struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
 	}
 }
 
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
-		       struct dlm_rsb *res)
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
+				struct dlm_rsb *res)
 {
 	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
 
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	struct dlm_lkb *lkb;
 	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
 
+	lock_rsb(res);
+
 	seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
 	for (i = 0; i < res->res_length; i++) {
 		if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	/* Print the locks attached to this resource */
 	seq_printf(s, "Granted Queue\n");
 	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	seq_printf(s, "Conversion Queue\n");
 	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	seq_printf(s, "Waiting Queue\n");
 	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	if (list_empty(&res->res_lookup))
 		goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 		seq_printf(s, "\n");
 	}
  out:
+	unlock_rsb(res);
+	return 0;
+}
+
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+	struct dlm_user_args *ua;
+	unsigned int waiting = 0;
+	uint64_t xid = 0;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		ua = (struct dlm_user_args *) lkb->lkb_astparam;
+		if (ua)
+			xid = ua->xid;
+	}
+
+	if (lkb->lkb_timestamp)
+		waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+	   r_nodeid r_len r_name */
+
+	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   waiting,
+		   r->res_nodeid,
+		   r->res_length,
+		   r->res_name);
+}
+
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+
+	lock_rsb(r);
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	unlock_rsb(r);
 	return 0;
 }
 
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			read_lock(&ls->ls_rsbtbl[i].lock);
 			if (!list_empty(&ls->ls_rsbtbl[i].list)) {
 				ri->next = ls->ls_rsbtbl[i].list.next;
+				ri->rsb = list_entry(ri->next, struct dlm_rsb,
+							res_hashchain);
+				dlm_hold_rsb(ri->rsb);
 				read_unlock(&ls->ls_rsbtbl[i].lock);
 				break;
 			}
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
 		if (ri->entry >= ls->ls_rsbtbl_size)
 			return 1;
 	} else {
+		struct dlm_rsb *old = ri->rsb;
 		i = ri->entry;
 		read_lock(&ls->ls_rsbtbl[i].lock);
 		ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			ri->next = NULL;
 			ri->entry++;
 			read_unlock(&ls->ls_rsbtbl[i].lock);
+			dlm_put_rsb(old);
 			goto top;
                 }
+		ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+		dlm_hold_rsb(ri->rsb);
 		read_unlock(&ls->ls_rsbtbl[i].lock);
+		dlm_put_rsb(old);
 	}
-	ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
 
 	return 0;
 }
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
 	struct rsb_iter *ri;
 
-	ri = kmalloc(sizeof *ri, GFP_KERNEL);
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
 	if (!ri)
 		return NULL;
 
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
 	struct rsb_iter *ri = iter_ptr;
 
-	print_resource(ri->rsb, file);
+	if (ri->locks) {
+		if (ri->header) {
+			seq_printf(file, "id nodeid remid pid xid exflags flags "
+					 "sts grmode rqmode time_ms r_nodeid "
+					 "r_len r_name\n");
+			ri->header = 0;
+		}
+		print_locks(ri->rsb, file);
+	} else {
+		print_resource(ri->rsb, file);
+	}
 
 	return 0;
 }
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
 };
 
 /*
+ * Dump state in compact per-lock listing
+ */
+
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+	struct rsb_iter *ri;
+
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
+	if (!ri)
+		return NULL;
+
+	ri->ls = ls;
+	ri->entry = 0;
+	ri->next = NULL;
+	ri->locks = 1;
+
+	if (*pos == 0)
+		ri->header = 1;
+
+	if (rsb_iter_next(ri)) {
+		rsb_iter_free(ri);
+		return NULL;
+	}
+
+	return ri;
+}
+
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+	struct rsb_iter *ri;
+	loff_t n = *pos;
+
+	ri = locks_iter_init(file->private, pos);
+	if (!ri)
+		return NULL;
+
+	while (n--) {
+		if (rsb_iter_next(ri)) {
+			rsb_iter_free(ri);
+			return NULL;
+		}
+	}
+
+	return ri;
+}
+
+static struct seq_operations locks_seq_ops = {
+	.start = locks_seq_start,
+	.next  = rsb_seq_next,
+	.stop  = rsb_seq_stop,
+	.show  = rsb_seq_show,
+};
+
+static int locks_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &locks_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private;
+
+	return 0;
+}
+
+static const struct file_operations locks_fops = {
+	.owner   = THIS_MODULE,
+	.open    = locks_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+/*
  * dump lkb's on the ls_waiters list
  */
 
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
 		return -ENOMEM;
 	}
 
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+	ls->ls_debug_locks_dentry = debugfs_create_file(name,
+							S_IFREG | S_IRUGO,
+							dlm_root,
+							ls,
+							&locks_fops);
+	if (!ls->ls_debug_locks_dentry) {
+		debugfs_remove(ls->ls_debug_waiters_dentry);
+		debugfs_remove(ls->ls_debug_rsb_dentry);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
 		debugfs_remove(ls->ls_debug_rsb_dentry);
 	if (ls->ls_debug_waiters_dentry)
 		debugfs_remove(ls->ls_debug_waiters_dentry);
+	if (ls->ls_debug_locks_dentry)
+		debugfs_remove(ls->ls_debug_locks_dentry);
 }
 
 int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a..74901e981e1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
 	void			*bastaddr;
 	int			mode;
 	struct dlm_lksb		*lksb;
+	unsigned long		timeout;
 };
 
 
@@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE	0x00200000
+#define DLM_IFL_WATCH_TIMEWARN	0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL	0x01000000
 #define DLM_IFL_USER		0x00000001
 #define DLM_IFL_ORPHAN		0x00000002
 
@@ -243,6 +247,9 @@ struct dlm_lkb {
 	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
 	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
+	struct list_head	lkb_time_list;
+	unsigned long		lkb_timestamp;
+	unsigned long		lkb_timeout_cs;
 
 	char			*lkb_lvbptr;
 	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
 	struct mutex		ls_orphans_mutex;
 	struct list_head	ls_orphans;
 
+	struct mutex		ls_timeout_mutex;
+	struct list_head	ls_timeout;
+
 	struct list_head	ls_nodes;	/* current nodes in ls */
 	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
 	int			ls_num_nodes;	/* number of nodes in ls */
 	int			ls_low_nodeid;
 	int			ls_total_weight;
 	int			*ls_node_array;
+	gfp_t			ls_allocation;
 
 	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
 	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
 
 	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
 	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
+	struct dentry		*ls_debug_locks_dentry; /* debugfs */
 
 	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
 	int			ls_uevent_result;
+	struct completion	ls_members_done;
+	int			ls_members_result;
 
 	struct miscdevice       ls_device;
 
@@ -472,6 +486,7 @@ struct dlm_ls {
 	struct task_struct	*ls_recoverd_task;
 	struct mutex		ls_recoverd_active;
 	spinlock_t		ls_recover_lock;
+	unsigned long		ls_recover_begin; /* jiffies timestamp */
 	uint32_t		ls_recover_status; /* DLM_RS_ */
 	uint64_t		ls_recover_seq;
 	struct dlm_recover	*ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY		3
 #define LSFL_RCOM_WAIT		4
 #define LSFL_UEVENT_WAIT	5
+#define LSFL_TIMEWARN		6
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
 	void __user		*castaddr;
 	void __user		*bastparam;
 	void __user		*bastaddr;
+	uint64_t		xid;
 };
 
 #define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96..b455919c199 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 				    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
 
 /*
  * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
 
 /* Threads cannot use the lockspace while it's being recovered */
 
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
 	down_read(&ls->ls_in_recovery);
 }
 
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
 	up_read(&ls->ls_in_recovery);
 }
 
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
 	return down_read_trylock(&ls->ls_in_recovery);
 }
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	if (is_master_copy(lkb))
 		return;
 
+	del_timeout(lkb);
+
 	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
 
+	/* if the operation was a cancel, then return -DLM_ECANCEL, if a
+	   timeout caused the cancel then return -ETIMEDOUT */
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+		rv = -ETIMEDOUT;
+	}
+
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+		rv = -EDEADLK;
+	}
+
 	lkb->lkb_lksb->sb_status = rv;
 	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
 
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	kref_init(&lkb->lkb_ref);
 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
 	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+	INIT_LIST_HEAD(&lkb->lkb_time_list);
 
 	get_random_bytes(&bucket, sizeof(bucket));
 	bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
 	int i;
 
-	if (dlm_locking_stopped(ls))
-		return;
-
 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
 		shrink_bucket(ls, i);
+		if (dlm_locking_stopped(ls))
+			break;
 		cond_resched();
 	}
 }
 
+static void add_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	if (is_master_copy(lkb)) {
+		lkb->lkb_timestamp = jiffies;
+		return;
+	}
+
+	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+		goto add_it;
+	}
+	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+		goto add_it;
+	return;
+
+ add_it:
+	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+	mutex_lock(&ls->ls_timeout_mutex);
+	hold_lkb(lkb);
+	lkb->lkb_timestamp = jiffies;
+	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	mutex_lock(&ls->ls_timeout_mutex);
+	if (!list_empty(&lkb->lkb_time_list)) {
+		list_del_init(&lkb->lkb_time_list);
+		unhold_lkb(lkb);
+	}
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	struct dlm_lkb *lkb;
+	int do_cancel, do_warn;
+
+	for (;;) {
+		if (dlm_locking_stopped(ls))
+			break;
+
+		do_cancel = 0;
+		do_warn = 0;
+		mutex_lock(&ls->ls_timeout_mutex);
+		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+					  lkb->lkb_timeout_cs * HZ/100))
+				do_cancel = 1;
+
+			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+				   	   dlm_config.ci_timewarn_cs * HZ/100))
+				do_warn = 1;
+
+			if (!do_cancel && !do_warn)
+				continue;
+			hold_lkb(lkb);
+			break;
+		}
+		mutex_unlock(&ls->ls_timeout_mutex);
+
+		if (!do_cancel && !do_warn)
+			break;
+
+		r = lkb->lkb_resource;
+		hold_rsb(r);
+		lock_rsb(r);
+
+		if (do_warn) {
+			/* clear flag so we only warn once */
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+				del_timeout(lkb);
+			dlm_timeout_warn(lkb);
+		}
+
+		if (do_cancel) {
+			log_debug(ls, "timeout cancel %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+			del_timeout(lkb);
+			_cancel_lock(r, lkb);
+		}
+
+		unlock_rsb(r);
+		unhold_rsb(r);
+		dlm_put_lkb(lkb);
+	}
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+	struct dlm_lkb *lkb;
+	long adj = jiffies - ls->ls_recover_begin;
+
+	ls->ls_recover_begin = 0;
+	mutex_lock(&ls->ls_timeout_mutex);
+	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+		lkb->lkb_timestamp += adj;
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
 /* lkb is master or local copy */
 
 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  * queue for one resource.  The granted mode of each lock blocks the requested
  * mode of the other lock."
  *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
  *
  * Example:
  * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  *
  * The first lock can't be granted because of the granted mode of the second
  * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
  *
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential.  If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
  */
 
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-	struct dlm_lkb *this, *first = NULL, *self = NULL;
+	struct dlm_lkb *lkb1;
+	int lkb_is_ahead = 0;
 
-	list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
-		if (!first)
-			first = this;
-		if (this == lkb) {
-			self = lkb;
+	list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+		if (lkb1 == lkb2) {
+			lkb_is_ahead = 1;
 			continue;
 		}
 
-		if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
-			return 1;
-	}
-
-	/* if lkb is on the convert queue and is preventing the first
-	   from being granted, then there's deadlock and we demote lkb.
-	   multiple converting locks may need to do this before the first
-	   converting lock can be granted. */
-
-	if (self && self != first) {
-		if (!modes_compat(lkb, first) &&
-		    !queue_conflict(&rsb->res_grantqueue, first))
-			return 1;
+		if (!lkb_is_ahead) {
+			if (!modes_compat(lkb2, lkb1))
+				return 1;
+		} else {
+			if (!modes_compat(lkb2, lkb1) &&
+			    !modes_compat(lkb1, lkb2))
+				return 1;
+		}
 	}
-
 	return 0;
 }
 
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	if (!now && !conv && list_empty(&r->res_convertqueue) &&
 	    first_in_list(lkb, &r->res_waitqueue))
 		return 1;
-
  out:
-	/*
-	 * The following, enabled by CONVDEADLK, departs from VMS.
-	 */
-
-	if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-	    conversion_deadlock_detect(r, lkb)) {
-		lkb->lkb_grmode = DLM_LOCK_NL;
-		lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-	}
-
 	return 0;
 }
 
-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+			  int *err)
 {
-	uint32_t flags = lkb->lkb_exflags;
 	int rv;
 	int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+	int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+	if (err)
+		*err = 0;
 
 	rv = _can_be_granted(r, lkb, now);
 	if (rv)
 		goto out;
 
-	if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+	/*
+	 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+	 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+	 * cancels one of the locks.
+	 */
+
+	if (is_convert && can_be_queued(lkb) &&
+	    conversion_deadlock_detect(r, lkb)) {
+		if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+			lkb->lkb_grmode = DLM_LOCK_NL;
+			lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+		} else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+			if (err)
+				*err = -EDEADLK;
+			else {
+				log_print("can_be_granted deadlock %x now %d",
+					  lkb->lkb_id, now);
+				dlm_dump_rsb(r);
+			}
+		}
 		goto out;
+	}
 
-	if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+	/*
+	 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+	 * to grant a request in a mode other than the normal rqmode.  It's a
+	 * simple way to provide a big optimization to applications that can
+	 * use them.
+	 */
+
+	if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
 		alt = DLM_LOCK_PR;
-	else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+	else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
 		alt = DLM_LOCK_CW;
 
 	if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	return rv;
 }
 
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
 	struct dlm_lkb *lkb, *s;
 	int hi, demoted, quit, grant_restart, demote_restart;
+	int deadlk;
 
 	quit = 0;
  restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
 
 	list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
 		demoted = is_demoted(lkb);
-		if (can_be_granted(r, lkb, 0)) {
+		deadlk = 0;
+
+		if (can_be_granted(r, lkb, 0, &deadlk)) {
 			grant_lock_pending(r, lkb);
 			grant_restart = 1;
-		} else {
-			hi = max_t(int, lkb->lkb_rqmode, hi);
-			if (!demoted && is_demoted(lkb))
-				demote_restart = 1;
+			continue;
 		}
+
+		if (!demoted && is_demoted(lkb)) {
+			log_print("WARN: pending demoted %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			demote_restart = 1;
+			continue;
+		}
+
+		if (deadlk) {
+			log_print("WARN: pending deadlock %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			dlm_dump_rsb(r);
+			continue;
+		}
+
+		hi = max_t(int, lkb->lkb_rqmode, hi);
 	}
 
 	if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
 	struct dlm_lkb *lkb, *s;
 
 	list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-		if (can_be_granted(r, lkb, 0))
+		if (can_be_granted(r, lkb, 0, NULL))
 			grant_lock_pending(r, lkb);
                 else
 			high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }
 
 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-			 int namelen, uint32_t parent_lkid, void *ast,
+			 int namelen, unsigned long timeout_cs, void *ast,
 			 void *astarg, void *bast, struct dlm_args *args)
 {
 	int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
 		goto out;
 
-	/* parent/child locks not yet supported */
-	if (parent_lkid)
-		goto out;
-
 	if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
 		goto out;
 
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	args->astaddr = ast;
 	args->astparam = (long) astarg;
 	args->bastaddr = bast;
+	args->timeout = timeout_cs;
 	args->mode = mode;
 	args->lksb = lksb;
 	rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 	lkb->lkb_lksb = args->lksb;
 	lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
 	lkb->lkb_ownpid = (int) current->pid;
+	lkb->lkb_timeout_cs = args->timeout;
 	rv = 0;
  out:
 	return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap(lkb))
 			goto out;
 
+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
 			rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap_unlock(lkb))
 			goto out;
 
+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
 			rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;
 
-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, NULL)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		error = -EINPROGRESS;
 		add_lkb(r, lkb, DLM_LKSTS_WAITING);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}
 
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;
+	int deadlk = 0;
 
 	/* changing an existing lock may allow others to be granted */
 
-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, &deadlk)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		grant_pending_locks(r);
 		goto out;
 	}
 
+	/* can_be_granted() detected that this lock would block in a conversion
+	   deadlock, so we leave it on the granted queue and return EDEADLK in
+	   the ast for the convert. */
+
+	if (deadlk) {
+		/* it's left on the granted queue */
+		log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+			  lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+			  lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+		revert_lock(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		error = -EDEADLK;
+		goto out;
+	}
+
 	/* is_demoted() means the can_be_granted() above set the grmode
 	   to NL, and left us on the granted queue.  This auto-demotion
 	   (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}
 
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	if (convert)
 		error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (error)
 		goto out;
 
-	error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+	error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
 			      astarg, bast, &args);
 	if (error)
 		goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
  out_put:
 	if (convert || error)
 		__put_lkb(ls, lkb);
-	if (error == -EAGAIN)
+	if (error == -EAGAIN || error == -EDEADLK)
 		error = 0;
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
 	   pass into lowcomms_commit and a message buffer (mb) that we
 	   write our data into */
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;
 
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
 		lkb->lkb_remid = ms->m_lkid;
 		if (is_altmode(lkb))
 			munge_altmode(lkb, ms);
-		if (result)
+		if (result) {
 			add_lkb(r, lkb, DLM_LKSTS_WAITING);
-		else {
+			add_timeout(lkb);
+		} else {
 			grant_lock_pc(r, lkb, ms);
 			queue_cast(r, lkb, 0);
 		}
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 		queue_cast(r, lkb, -EAGAIN);
 		break;
 
+	case -EDEADLK:
+		receive_flags_reply(lkb, ms);
+		revert_lock_pc(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		break;
+
 	case -EINPROGRESS:
 		/* convert was queued on remote master */
 		receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 			munge_demoted(lkb, ms);
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+		add_timeout(lkb);
 		break;
 
 	case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
 	case -DLM_ECANCEL:
 		receive_flags_reply(lkb, ms);
 		revert_lock_pc(r, lkb);
-		if (ms->m_result)
-			queue_cast(r, lkb, -DLM_ECANCEL);
+		queue_cast(r, lkb, -DLM_ECANCEL);
 		break;
 	case 0:
 		break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 			}
 		}
 
-		if (lock_recovery_try(ls))
+		if (dlm_lock_recovery_try(ls))
 			break;
 		schedule();
 	}
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 		log_error(ls, "unknown message type %d", ms->m_type);
 	}
 
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
  out:
 	dlm_put_lockspace(ls);
 	dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 		     int mode, uint32_t flags, void *name, unsigned int namelen,
-		     uint32_t parent_lkid)
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = create_lkb(ls, &lkb);
 	if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	   When DLM_IFL_USER is set, the dlm knows that this is a userspace
 	   lock and that lkb_astparam is the dlm_user_args structure. */
 
-	error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+	error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
 			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	lkb->lkb_flags |= DLM_IFL_USER;
 	ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
 	spin_unlock(&ua->proc->locks_spin);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	return error;
 }
 
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	if (lvb_in && ua->lksb.sb_lvbptr)
 		memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
 
+	ua->xid = ua_tmp->xid;
 	ua->castparam = ua_tmp->castparam;
 	ua->castaddr = ua_tmp->castaddr;
 	ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	ua->user_lksb = ua_tmp->user_lksb;
 	ua->old_mode = lkb->lkb_grmode;
 
-	error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
-			      ua, DLM_FAKE_USER_AST, &args);
+	error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
+			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	if (error)
 		goto out_put;
 
 	error = convert_lock(ls, lkb, &args);
 
-	if (error == -EINPROGRESS || error == -EAGAIN)
+	if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
 		error = 0;
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
 
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+	struct dlm_lkb *lkb;
+	struct dlm_args args;
+	struct dlm_user_args *ua;
+	struct dlm_rsb *r;
+	int error;
+
+	dlm_lock_recovery(ls);
+
+	error = find_lkb(ls, lkid, &lkb);
+	if (error)
+		goto out;
+
+	ua = (struct dlm_user_args *)lkb->lkb_astparam;
+
+	error = set_unlock_args(flags, ua, &args);
+	if (error)
+		goto out_put;
+
+	/* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+	r = lkb->lkb_resource;
+	hold_rsb(r);
+	lock_rsb(r);
+
+	error = validate_unlock_args(lkb, &args);
+	if (error)
+		goto out_r;
+	lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+	error = _cancel_lock(r, lkb);
+ out_r:
+	unlock_rsb(r);
+	put_rsb(r);
+
+	if (error == -DLM_ECANCEL)
+		error = 0;
+	/* from validate_unlock_args() */
+	if (error == -EBUSY)
+		error = 0;
+ out_put:
+	dlm_put_lkb(lkb);
+ out:
+	dlm_unlock_recovery(ls);
+	return error;
+}
+
 /* lkb's that are removed from the waiters list by revert are just left on the
    orphans list with the granted orphan locks, to be freed by purge */
 
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
 	struct dlm_lkb *lkb, *safe;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	while (1) {
 		lkb = del_proc_lock(ls, proc);
 		if (!lkb)
 			break;
+		del_timeout(lkb);
 		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
 			orphan_proc_lock(ls, lkb);
 		else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 	}
 
 	mutex_unlock(&ls->ls_clear_proc_locks);
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 }
 
 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	if (nodeid != dlm_our_nodeid()) {
 		error = send_purge(ls, nodeid, pid);
 	} else {
-		lock_recovery(ls);
+		dlm_lock_recovery(ls);
 		if (pid == current->pid)
 			purge_proc_locks(ls, proc);
 		else
 			do_purge(ls, nodeid, pid);
-		unlock_recovery(ls);
+		dlm_unlock_recovery(ls);
 	}
 	return error;
 }
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec4066..1720313c22d 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
 
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-	uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+	uint32_t flags, void *name, unsigned int namelen,
+	unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-	int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+	int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+	unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 
 static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed..1dc72105ab1 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
 	else
 		kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
 
+	log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+	/* dlm_controld will see the uevent, do the necessary group management
+	   and then write to sysfs to wake us */
+
 	error = wait_event_interruptible(ls->ls_uevent_wait,
 			test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+	log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
 	if (error)
 		goto out;
 
 	error = ls->ls_uevent_result;
  out:
+	if (error)
+		log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+			  error, ls->ls_uevent_result);
 	return error;
 }
 
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
 	struct dlm_ls *ls;
 
 	while (!kthread_should_stop()) {
-		list_for_each_entry(ls, &lslist, ls_list)
-			dlm_scan_rsbs(ls);
+		list_for_each_entry(ls, &lslist, ls_list) {
+			if (dlm_lock_recovery_try(ls)) {
+				dlm_scan_rsbs(ls);
+				dlm_scan_timeout(ls);
+				dlm_unlock_recovery(ls);
+			}
+		}
 		schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
 	}
 	return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
 	struct dlm_ls *ls;
 	int i, size, error = -ENOMEM;
+	int do_unreg = 0;
 
 	if (namelen > DLM_LOCKSPACE_LEN)
 		return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 		goto out;
 	memcpy(ls->ls_name, name, namelen);
 	ls->ls_namelen = namelen;
-	ls->ls_exflags = flags;
 	ls->ls_lvblen = lvblen;
 	ls->ls_count = 0;
 	ls->ls_flags = 0;
 
+	if (flags & DLM_LSFL_TIMEWARN)
+		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+	if (flags & DLM_LSFL_FS)
+		ls->ls_allocation = GFP_NOFS;
+	else
+		ls->ls_allocation = GFP_KERNEL;
+
+	/* ls_exflags are forced to match among nodes, and we don't
+	   need to require all nodes to have TIMEWARN or FS set */
+	ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+
 	size = dlm_config.ci_rsbtbl_size;
 	ls->ls_rsbtbl_size = size;
 
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	mutex_init(&ls->ls_waiters_mutex);
 	INIT_LIST_HEAD(&ls->ls_orphans);
 	mutex_init(&ls->ls_orphans_mutex);
+	INIT_LIST_HEAD(&ls->ls_timeout);
+	mutex_init(&ls->ls_timeout_mutex);
 
 	INIT_LIST_HEAD(&ls->ls_nodes);
 	INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 
 	init_waitqueue_head(&ls->ls_uevent_wait);
 	ls->ls_uevent_result = 0;
+	init_completion(&ls->ls_members_done);
+	ls->ls_members_result = -1;
 
 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	error = dlm_recoverd_start(ls);
 	if (error) {
 		log_error(ls, "can't start dlm_recoverd %d", error);
-		goto out_rcomfree;
+		goto out_delist;
 	}
 
-	dlm_create_debug_file(ls);
-
 	error = kobject_setup(ls);
 	if (error)
-		goto out_del;
+		goto out_stop;
 
 	error = kobject_register(&ls->ls_kobj);
 	if (error)
-		goto out_del;
+		goto out_stop;
+
+	/* let kobject handle freeing of ls if there's an error */
+	do_unreg = 1;
+
+	/* This uevent triggers dlm_controld in userspace to add us to the
+	   group of nodes that are members of this lockspace (managed by the
+	   cluster infrastructure.)  Once it's done that, it tells us who the
+	   current lockspace members are (via configfs) and then tells the
+	   lockspace to start running (via sysfs) in dlm_ls_start(). */
 
 	error = do_uevent(ls, 1);
 	if (error)
-		goto out_unreg;
+		goto out_stop;
+
+	wait_for_completion(&ls->ls_members_done);
+	error = ls->ls_members_result;
+	if (error)
+		goto out_members;
+
+	dlm_create_debug_file(ls);
+
+	log_debug(ls, "join complete");
 
 	*lockspace = ls;
 	return 0;
 
- out_unreg:
-	kobject_unregister(&ls->ls_kobj);
- out_del:
-	dlm_delete_debug_file(ls);
+ out_members:
+	do_uevent(ls, 0);
+	dlm_clear_members(ls);
+	kfree(ls->ls_node_array);
+ out_stop:
 	dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
 	spin_lock(&lslist_lock);
 	list_del(&ls->ls_list);
 	spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
  out_rsbfree:
 	kfree(ls->ls_rsbtbl);
  out_lsfree:
-	kfree(ls);
+	if (do_unreg)
+		kobject_unregister(&ls->ls_kobj);
+	else
+		kfree(ls);
  out:
 	module_put(THIS_MODULE);
 	return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
 	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
 	if (!error)
 		ls_count++;
+	else if (!ls_count)
+		threads_stop();
  out:
 	mutex_unlock(&ls_lock);
 	return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	dlm_clear_members_gone(ls);
 	kfree(ls->ls_node_array);
 	kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+	/* The ls structure will be freed when the kobject is done with */
 
 	mutex_lock(&ls_lock);
 	ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29..0553a6158dc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
 	struct connection *con = sock2con(sk);
-	if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
 		queue_work(recv_workqueue, &con->rwork);
 }
 
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
 	struct connection *con = sock2con(sk);
 
-	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
 		queue_work(send_workqueue, &con->swork);
 }
 
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
 			INIT_WORK(&othercon->rwork, process_recv_sockets);
 			set_bit(CF_IS_OTHERCON, &othercon->flags);
 			newcon->othercon = othercon;
+			othercon->sock = newsock;
+			newsock->sk->sk_user_data = othercon;
+			add_sock(newsock, othercon);
+			addcon = othercon;
+		}
+		else {
+			printk("Extra connection from node %d attempted\n", nodeid);
+			result = -EAGAIN;
+			mutex_unlock(&newcon->sock_mutex);
+			goto accept_err;
 		}
-		othercon->sock = newsock;
-		newsock->sk->sk_user_data = othercon;
-		add_sock(newsock, othercon);
-		addcon = othercon;
 	}
 	else {
 		newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
 	down(&connections_lock);
 	for (i = 0; i <= max_nodeid; i++) {
 		con = __nodeid2con(i, 0);
-		if (con)
+		if (con) {
 			con->flags |= 0xFF;
+			if (con->sock)
+				con->sock->sk->sk_user_data = NULL;
+		}
 	}
 	up(&connections_lock);
 
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe..eca2907f238 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
 
 static int __init init_dlm(void)
 {
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
 	if (error)
 		goto out_debug;
 
+	error = dlm_netlink_init();
+	if (error)
+		goto out_user;
+
 	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
+ out_user:
+	dlm_user_exit();
  out_debug:
 	dlm_unregister_debugfs();
  out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
 
 static void __exit exit_dlm(void)
 {
+	dlm_netlink_exit();
 	dlm_user_exit();
 	dlm_config_exit();
 	dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd74..073599dced2 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	*neg_out = neg;
 
 	error = ping_members(ls);
+	if (!error || error == -EPROTO) {
+		/* new_lockspace() may be waiting to know if the config
+		   is good or bad */
+		ls->ls_members_result = error;
+		complete(&ls->ls_members_done);
+	}
 	if (error)
 		goto out;
 
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	dlm_recoverd_suspend(ls);
 	ls->ls_recover_status = 0;
 	dlm_recoverd_resume(ls);
+
+	if (!ls->ls_recover_begin)
+		ls->ls_recover_begin = jiffies;
 	return 0;
 }
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 00000000000..863b87d0dc7
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= DLM_GENL_NAME,
+	.version	= DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+	struct sk_buff *skb;
+	void *data;
+
+	skb = genlmsg_new(size, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* add the message headers */
+	data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+	if (!data) {
+		nlmsg_free(skb);
+		return -EINVAL;
+	}
+
+	*skbp = skb;
+	return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+	struct nlattr *ret;
+
+	ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+	if (!ret)
+		return NULL;
+	return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	void *data = genlmsg_data(genlhdr);
+	int rv;
+
+	rv = genlmsg_end(skb, data);
+	if (rv < 0) {
+		nlmsg_free(skb);
+		return rv;
+	}
+
+	return genlmsg_unicast(skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	listener_nlpid = info->snd_pid;
+	printk("user_cmd nlpid %u\n", listener_nlpid);
+	return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+	.cmd		= DLM_CMD_HELLO,
+	.doit		= user_cmd,
+};
+
+int dlm_netlink_init(void)
+{
+	int rv;
+
+	rv = genl_register_family(&family);
+	if (rv)
+		return rv;
+
+	rv = genl_register_ops(&family, &dlm_nl_ops);
+	if (rv < 0)
+		goto err;
+	return 0;
+ err:
+	genl_unregister_family(&family);
+	return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+	genl_unregister_ops(&family, &dlm_nl_ops);
+	genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+	struct dlm_rsb *r = lkb->lkb_resource;
+	struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+
+	memset(data, 0, sizeof(struct dlm_lock_data));
+
+	data->version = DLM_LOCK_DATA_VERSION;
+	data->nodeid = lkb->lkb_nodeid;
+	data->ownpid = lkb->lkb_ownpid;
+	data->id = lkb->lkb_id;
+	data->remid = lkb->lkb_remid;
+	data->status = lkb->lkb_status;
+	data->grmode = lkb->lkb_grmode;
+	data->rqmode = lkb->lkb_rqmode;
+	data->timestamp = lkb->lkb_timestamp;
+	if (ua)
+		data->xid = ua->xid;
+	if (r) {
+		data->lockspace_id = r->res_ls->ls_global_id;
+		data->resource_namelen = r->res_length;
+		memcpy(data->resource_name, r->res_name, r->res_length);
+	}
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+	struct dlm_lock_data *data;
+	struct sk_buff *send_skb;
+	size_t size;
+	int rv;
+
+	size = nla_total_size(sizeof(struct dlm_lock_data)) +
+	       nla_total_size(0); /* why this? */
+
+	rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+	if (rv < 0)
+		return;
+
+	data = mk_data(send_skb);
+	if (!data) {
+		nlmsg_free(send_skb);
+		return;
+	}
+
+	fill_data(data, lkb);
+
+	send_data(send_skb);
+}
+
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd615380..e3a1527cbdb 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + len;
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh) {
 		log_print("create_rcom to %d type %d len %d ENOBUFS",
 			  to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "version mismatch: %x nodeid %d: %x",
 			  DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
 			  rc->rc_header.h_version);
-		return -EINVAL;
+		return -EPROTO;
 	}
 
 	if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
 			  ls->ls_lvblen, ls->ls_exflags,
 			  nodeid, rf->rf_lvblen, rf->rf_lsflags);
-		return -EINVAL;
+		return -EPROTO;
 	}
 	return 0;
 }
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	dlm_recover_process_copy(ls, rc_in);
 }
 
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+			     struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
-	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;
 	memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 		log_print("lockspace %x from %d type %x not found",
 			  hd->h_lockspace, nodeid, rc->rc_type);
 		if (rc->rc_type == DLM_RCOM_STATUS)
-			send_ls_not_ready(nodeid, rc);
+			send_ls_not_ready(ls, nodeid, rc);
 		return;
 	}
 
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d6024..66575997861 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_clear_members_gone(ls);
 
+	dlm_adjust_timeouts(ls);
+
 	error = enable_locking(ls, rv->seq);
 	if (error) {
 		log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a..6438941ab1f 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
 	__u8 mode;
 	__u8 namelen;
-	__u16 flags;
+	__u16 unused;
+	__u32 flags;
 	__u32 lkid;
 	__u32 parent;
-
+	__u64 xid;
+	__u64 timeout;
 	__u32 castparam;
 	__u32 castaddr;
 	__u32 bastparam;
 	__u32 bastaddr;
 	__u32 lksb;
-
 	char lvb[DLM_USER_LVB_LEN];
 	char name[0];
 };
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };
 
 struct dlm_lock_result32 {
+	__u32 version[3];
 	__u32 length;
 	__u32 user_astaddr;
 	__u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
 		kb->i.lock.flags = kb32->i.lock.flags;
 		kb->i.lock.lkid = kb32->i.lock.lkid;
 		kb->i.lock.parent = kb32->i.lock.parent;
+		kb->i.lock.xid = kb32->i.lock.xid;
+		kb->i.lock.timeout = kb32->i.lock.timeout;
 		kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
 		kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
 		kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
 			  struct dlm_lock_result32 *res32)
 {
+	res32->version[0] = res->version[0];
+	res32->version[1] = res->version[1];
+	res32->version[2] = res->version[2];
+
 	res32->user_astaddr = (__u32)(long)res->user_astaddr;
 	res32->user_astparam = (__u32)(long)res->user_astparam;
 	res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif
 
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+	switch (sb_status) {
+	case -DLM_EUNLOCK:
+		return 1;
+	case -DLM_ECANCEL:
+	case -ETIMEDOUT:
+	case -EDEADLK:
+		if (lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	case -EAGAIN:
+		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
    being removed and then remove that lkb from the orphans list and free it */
 
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
 		log_debug(ls, "ast overlap %x status %x %x",
 			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
 
-	/* Figure out if this lock is at the end of its life and no longer
-	   available for the application to use.  The lkb still exists until
-	   the final ast is read.  A lock becomes EOL in three situations:
-	     1. a noqueue request fails with EAGAIN
-	     2. an unlock completes with EUNLOCK
-	     3. a cancel of a waiting request completes with ECANCEL
-	   An EOL lock needs to be removed from the process's list of locks.
-	   And we can't allow any new operation on an EOL lock.  This is
-	   not related to the lifetime of the lkb struct which is managed
-	   entirely by refcount. */
-
-	if (type == AST_COMP &&
-	    lkb->lkb_grmode == DLM_LOCK_IV &&
-	    ua->lksb.sb_status == -EAGAIN)
-		eol = 1;
-	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-	    (ua->lksb.sb_status == -DLM_ECANCEL &&
-	     lkb->lkb_grmode == DLM_LOCK_IV))
-		eol = 1;
+	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
 	if (eol) {
 		lkb->lkb_ast_type &= ~AST_BAST;
 		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
 	ua->castaddr = params->castaddr;
 	ua->bastparam = params->bastparam;
 	ua->bastaddr = params->bastaddr;
+	ua->xid = params->xid;
 
 	if (params->flags & DLM_LKF_CONVERT)
 		error = dlm_user_convert(ls, ua,
 				         params->mode, params->flags,
-				         params->lkid, params->lvb);
+				         params->lkid, params->lvb,
+					 (unsigned long) params->timeout);
 	else {
 		error = dlm_user_request(ls, ua,
 					 params->mode, params->flags,
 					 params->name, params->namelen,
-					 params->parent);
+					 (unsigned long) params->timeout);
 		if (!error)
 			error = ua->lksb.sb_lkid;
 	}
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
 	return error;
 }
 
+static int device_user_deadlock(struct dlm_user_proc *proc,
+				struct dlm_lock_params *params)
+{
+	struct dlm_ls *ls;
+	int error;
+
+	ls = dlm_find_lockspace_local(proc->lockspace);
+	if (!ls)
+		return -ENOENT;
+
+	error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+	dlm_put_lockspace(ls);
+	return error;
+}
+
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
 	int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 		return -EPERM;
 
 	error = dlm_new_lockspace(params->name, strlen(params->name),
-				  &lockspace, 0, DLM_USER_LVB_LEN);
+				  &lockspace, params->flags, DLM_USER_LVB_LEN);
 	if (error)
 		return error;
 
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 		error = device_user_unlock(proc, &kbuf->i.lock);
 		break;
 
+	case DLM_USER_DEADLOCK:
+		if (!proc) {
+			log_print("no locking on control device");
+			goto out_sig;
+		}
+		error = device_user_deadlock(proc, &kbuf->i.lock);
+		break;
+
 	case DLM_USER_CREATE_LOCKSPACE:
 		if (proc) {
 			log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	int struct_len;
 
 	memset(&result, 0, sizeof(struct dlm_lock_result));
+	result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	result.version[1] = DLM_DEVICE_VERSION_MINOR;
+	result.version[2] = DLM_DEVICE_VERSION_PATCH;
 	memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
 	result.user_lksb = ua->user_lksb;
 
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	return error;
 }
 
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+	struct dlm_device_version ver;
+
+	memset(&ver, 0, sizeof(struct dlm_device_version));
+	ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+	ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+	if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+		return -EFAULT;
+	return sizeof(struct dlm_device_version);
+}
+
 /* a read returns a single ast described in a struct dlm_lock_result */
 
 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 	DECLARE_WAITQUEUE(wait, current);
 	int error, type=0, bmode=0, removed = 0;
 
+	if (count == sizeof(struct dlm_device_version)) {
+		error = copy_version_to_user(buf, count);
+		return error;
+	}
+
+	if (!proc) {
+		log_print("non-version read from control device %zu", count);
+		return -EINVAL;
+	}
+
 #ifdef CONFIG_COMPAT
 	if (count < sizeof(struct dlm_lock_result32))
 #else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 		}
 	}
 
-	if (list_empty(&proc->asts)) {
-		spin_unlock(&proc->asts_spin);
-		return -EAGAIN;
-	}
-
 	/* there may be both completion and blocking asts to return for
 	   the lkb, don't remove lkb from asts list unless no asts remain */
 
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
 	.open    = ctl_device_open,
 	.release = ctl_device_close,
+	.read    = device_read,
 	.write   = device_write,
 	.owner   = THIS_MODULE,
 };
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 03ea7696fe3..59375efcf39 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb)
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		if (inode->i_state & (I_FREEING|I_WILL_FREE))
 			continue;
-		invalidate_mapping_pages(inode->i_mapping, 0, -1);
+		__invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
 	}
 	spin_unlock(&inode_lock);
 }
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 403e3bad145..1b9dd9a96f1 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -580,5 +580,7 @@ void
 ecryptfs_write_header_metadata(char *virt,
 			       struct ecryptfs_crypt_stat *crypt_stat,
 			       size_t *written);
+int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
+			 int num_zeros);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d81707..94f456fe4d9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
 	return rc;
 }
 
-static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
-				 size_t count, read_actor_t actor, void *target)
+static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
+				    struct pipe_inode_info *pipe, size_t count,
+				    unsigned int flags)
 {
 	struct file *lower_file = NULL;
 	int rc = -EINVAL;
 
 	lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file->f_op && lower_file->f_op->sendfile)
-		rc = lower_file->f_op->sendfile(lower_file, ppos, count,
-						actor, target);
+	if (lower_file->f_op && lower_file->f_op->splice_read)
+		rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
+						count, flags);
 
 	return rc;
 }
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
 	.release = ecryptfs_release,
 	.fsync = ecryptfs_fsync,
 	.fasync = ecryptfs_fasync,
-	.sendfile = ecryptfs_sendfile,
+	.splice_read = ecryptfs_splice_read,
 };
 
 const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
 	.release = ecryptfs_release,
 	.fsync = ecryptfs_fsync,
 	.fasync = ecryptfs_fasync,
-	.sendfile = ecryptfs_sendfile,
+	.splice_read = ecryptfs_splice_read,
 };
 
 static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1548be26b5e..83e94fedd4e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -800,6 +800,25 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 			goto out_fput;
 		}
 	} else { /* new_length < i_size_read(inode) */
+		pgoff_t index = 0;
+		int end_pos_in_page = -1;
+
+		if (new_length != 0) {
+			index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
+			end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
+		}
+		if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) {
+			if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file,
+						       index,
+						       (end_pos_in_page + 1),
+						       ((PAGE_CACHE_SIZE - 1)
+							- end_pos_in_page)))) {
+				printk(KERN_ERR "Error attempting to zero out "
+				       "the remainder of the end page on "
+				       "reducing truncate; rc = [%d]\n", rc);
+				goto out_fput;
+			}
+		}
 		vmtruncate(inode, new_length);
 		rc = ecryptfs_write_inode_size_to_metadata(
 			lower_file, lower_dentry->d_inode, inode, dentry,
@@ -875,9 +894,54 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	struct ecryptfs_crypt_stat *crypt_stat;
 
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
+	if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
+		ecryptfs_init_crypt_stat(crypt_stat);
 	inode = dentry->d_inode;
 	lower_inode = ecryptfs_inode_to_lower(inode);
+	lower_dentry = ecryptfs_dentry_to_lower(dentry);
+	mutex_lock(&crypt_stat->cs_mutex);
+	if (S_ISDIR(dentry->d_inode->i_mode))
+		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+	else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
+		 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
+		struct vfsmount *lower_mnt;
+		struct file *lower_file = NULL;
+		struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+		int lower_flags;
+
+		lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+		lower_flags = O_RDONLY;
+		if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry,
+						   lower_mnt, lower_flags))) {
+			printk(KERN_ERR
+			       "Error opening lower file; rc = [%d]\n", rc);
+			mutex_unlock(&crypt_stat->cs_mutex);
+			goto out;
+		}
+		mount_crypt_stat = &ecryptfs_superblock_to_private(
+			dentry->d_sb)->mount_crypt_stat;
+		if ((rc = ecryptfs_read_metadata(dentry, lower_file))) {
+			if (!(mount_crypt_stat->flags
+			      & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
+				rc = -EIO;
+				printk(KERN_WARNING "Attempt to read file that "
+				       "is not in a valid eCryptfs format, "
+				       "and plaintext passthrough mode is not "
+				       "enabled; returning -EIO\n");
+
+				mutex_unlock(&crypt_stat->cs_mutex);
+				fput(lower_file);
+				goto out;
+			}
+			rc = 0;
+			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+			mutex_unlock(&crypt_stat->cs_mutex);
+			fput(lower_file);
+			goto out;
+		}
+		fput(lower_file);
+	}
+	mutex_unlock(&crypt_stat->cs_mutex);
 	if (ia->ia_valid & ATTR_SIZE) {
 		ecryptfs_printk(KERN_DEBUG,
 				"ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c92..02ca6f1e55d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
 		goto out;
 	}
 	kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
-	sysfs_attr_version.attr.owner = THIS_MODULE;
-	sysfs_attr_version_str.attr.owner = THIS_MODULE;
 	rc = do_sysfs_registration();
 	if (rc) {
 		printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 55cec98a84e..7d5a43cb0d5 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -56,9 +56,6 @@ static struct page *ecryptfs_get1page(struct file *file, int index)
 	return read_mapping_page(mapping, index, (void *)file);
 }
 
-static
-int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
-
 /**
  * ecryptfs_fill_zeros
  * @file: The ecryptfs file
@@ -101,10 +98,13 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
 	if (old_end_page_index == new_end_page_index) {
 		/* Start and end are in the same page; we just need to
 		 * set a portion of the existing page to zero's */
-		rc = write_zeros(file, index, (old_end_pos_in_page + 1),
-				 (new_end_pos_in_page - old_end_pos_in_page));
+		rc = ecryptfs_write_zeros(file, index,
+					  (old_end_pos_in_page + 1),
+					  (new_end_pos_in_page
+					   - old_end_pos_in_page));
 		if (rc)
-			ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+			ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
+					"file=[%p], "
 					"index=[0x%.16x], "
 					"old_end_pos_in_page=[d], "
 					"(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -117,10 +117,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
 		goto out;
 	}
 	/* Fill the remainder of the previous last page with zeros */
-	rc = write_zeros(file, index, (old_end_pos_in_page + 1),
+	rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1),
 			 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+		ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], "
 				"index=[0x%.16x], old_end_pos_in_page=[d], "
 				"(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
 				"returned [%d]\n", file, index,
@@ -131,9 +131,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
 	index++;
 	while (index < new_end_page_index) {
 		/* Fill all intermediate pages with zeros */
-		rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE);
+		rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE);
 		if (rc) {
-			ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+			ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
+					"file=[%p], "
 					"index=[0x%.16x], "
 					"old_end_pos_in_page=[d], "
 					"(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -149,9 +150,9 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
 	}
 	/* Fill the portion at the beginning of the last new page with
 	 * zero's */
-	rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1));
+	rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1));
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "write_zeros(file="
+		ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file="
 				"[%p], index=[0x%.16x], 0, "
 				"new_end_pos_in_page=[%d]"
 				"returned [%d]\n", file, index,
@@ -400,7 +401,6 @@ out:
 static int ecryptfs_prepare_write(struct file *file, struct page *page,
 				  unsigned from, unsigned to)
 {
-	loff_t pos;
 	int rc = 0;
 
 	if (from == 0 && to == PAGE_CACHE_SIZE)
@@ -408,15 +408,22 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
 				   up to date. */
 	if (!PageUptodate(page))
 		rc = ecryptfs_do_readpage(file, page, page->index);
-	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (pos > i_size_read(page->mapping->host)) {
-		rc = ecryptfs_truncate(file->f_path.dentry, pos);
-		if (rc) {
-			printk(KERN_ERR "Error on attempt to "
-			       "truncate to (higher) offset [%lld];"
-			       " rc = [%d]\n", pos, rc);
-			goto out;
+	if (page->index != 0) {
+		loff_t end_of_prev_pg_pos =
+			(((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
+
+		if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
+			rc = ecryptfs_truncate(file->f_path.dentry,
+					       end_of_prev_pg_pos);
+			if (rc) {
+				printk(KERN_ERR "Error on attempt to "
+				       "truncate to (higher) offset [%lld];"
+				       " rc = [%d]\n", end_of_prev_pg_pos, rc);
+				goto out;
+			}
 		}
+		if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host))
+			zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
 	}
 out:
 	return rc;
@@ -753,7 +760,7 @@ out:
 }
 
 /**
- * write_zeros
+ * ecryptfs_write_zeros
  * @file: The ecryptfs file
  * @index: The index in which we are writing
  * @start: The position after the last block of data
@@ -763,8 +770,8 @@ out:
  *
  * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
  */
-static
-int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
+int
+ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
 {
 	int rc = 0;
 	struct page *tmp_page;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d385..ab7961260c4 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -24,9 +24,9 @@
 #include "acl.h"
 
 /*
- * Called when an inode is released. Note that this is different
- * from ext2_open_file: open gets called at every open, but release
- * gets called only when /all/ the files are closed.
+ * Called when filp is released. This happens when all file descriptors
+ * for a single struct file are closed. Note that different open() calls
+ * for the same file yield different struct file structures.
  */
 static int ext2_release_file (struct inode * inode, struct file * filp)
 {
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_sync_file,
-	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_sync_file,
-	.sendfile	= xip_file_sendfile,
 };
 #endif
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 16337bff027..b2efd9083b9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1038,6 +1038,15 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
+	ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+				    EXT2_MOUNT_XIP if not */
+
+	if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
+		printk("XIP: Unsupported blocksize\n");
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
 	es = sbi->s_es;
 	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
 	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
@@ -1090,15 +1099,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct super_block *sb = dentry->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = sbi->s_es;
-	unsigned long overhead;
-	int i;
 	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
-		overhead = 0;
-	else {
+		sbi->s_overhead_last = 0;
+	else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+		unsigned long i, overhead = 0;
+		smp_rmb();
+
 		/*
-		 * Compute the overhead (FS structures)
+		 * Compute the overhead (FS structures). This is constant
+		 * for a given filesystem unless the number of block groups
+		 * changes so we cache the previous value until it does.
 		 */
 
 		/*
@@ -1122,17 +1134,22 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 */
 		overhead += (sbi->s_groups_count *
 			     (2 + sbi->s_itb_per_group));
+		sbi->s_overhead_last = overhead;
+		smp_wmb();
+		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
 	}
 
 	buf->f_type = EXT2_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
 	buf->f_bfree = ext2_count_free_blocks(sb);
+	es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
 	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
 	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = ext2_count_free_inodes(sb);
+	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
 	buf->f_namelen = EXT2_NAME_LEN;
 	fsid = le64_to_cpup((void *)es->s_uuid) ^
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f1386453..acc4913d301 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
 	.open		= generic_file_open,
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
-	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a6cb6171c3a..de4e3161e47 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2677,8 +2677,10 @@ void ext3_read_inode(struct inode * inode)
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
 		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT3_INODE_SIZE(inode->i_sb))
+		    EXT3_INODE_SIZE(inode->i_sb)) {
+			brelse (bh);
 			goto bad_inode;
+		}
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
 			ei->i_extra_isize = sizeof(struct ext3_inode) -
@@ -3193,7 +3195,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	 */
 
 	journal = EXT3_JOURNAL(inode);
-	if (is_journal_aborted(journal) || IS_RDONLY(inode))
+	if (is_journal_aborted(journal))
 		return -EROFS;
 
 	journal_lock_updates(journal);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 9bb046df827..1586807b817 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1019,6 +1019,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 
 		if (!inode)
 			return ERR_PTR(-EACCES);
+
+		if (is_bad_inode(inode)) {
+			iput(inode);
+			return ERR_PTR(-ENOENT);
+		}
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1054,6 +1059,11 @@ struct dentry *ext3_get_parent(struct dentry *child)
 	if (!inode)
 		return ERR_PTR(-EACCES);
 
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		return ERR_PTR(-ENOENT);
+	}
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e3062913a9..51d1c456cda 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -35,6 +35,7 @@
 #include <linux/namei.h>
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
+#include <linux/log2.h>
 
 #include <asm/uaccess.h>
 
@@ -459,6 +460,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 
 static void ext3_destroy_inode(struct inode *inode)
 {
+	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
+		printk("EXT3 Inode %p: orphan list check failed!\n",
+			EXT3_I(inode));
+		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
+				EXT3_I(inode), sizeof(struct ext3_inode_info),
+				false);
+		dump_stack();
+	}
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
@@ -1566,7 +1575,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
 		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
-		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
+		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
 			printk (KERN_ERR
 				"EXT3-fs: unsupported inode size: %d\n",
@@ -2075,6 +2084,7 @@ static int ext3_create_journal(struct super_block * sb,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
+	int err;
 
 	if (sb->s_flags & MS_RDONLY) {
 		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
@@ -2082,13 +2092,15 @@ static int ext3_create_journal(struct super_block * sb,
 		return -EROFS;
 	}
 
-	if (!(journal = ext3_get_journal(sb, journal_inum)))
+	journal = ext3_get_journal(sb, journal_inum);
+	if (!journal)
 		return -EINVAL;
 
 	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
 	       journal_inum);
 
-	if (journal_create(journal)) {
+	err = journal_create(journal);
+	if (err) {
 		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
 		journal_destroy(journal);
 		return -EIO;
@@ -2139,12 +2151,14 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
 
 	journal_lock_updates(journal);
 	journal_flush(journal);
+	lock_super(sb);
 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
 		ext3_commit_super(sb, es, 1);
 	}
+	unlock_super(sb);
 	journal_unlock_updates(journal);
 }
 
@@ -2333,7 +2347,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			    (sbi->s_mount_state & EXT3_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
+			/*
+			 * We have to unlock super so that we can wait for
+			 * transactions.
+			 */
+			unlock_super(sb);
 			ext3_mark_recovery_complete(sb, es);
+			lock_super(sb);
 		} else {
 			__le32 ret;
 			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -2406,19 +2426,19 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct super_block *sb = dentry->d_sb;
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	struct ext3_super_block *es = sbi->s_es;
-	ext3_fsblk_t overhead;
-	int i;
 	u64 fsid;
 
-	if (test_opt (sb, MINIX_DF))
-		overhead = 0;
-	else {
-		unsigned long ngroups;
-		ngroups = EXT3_SB(sb)->s_groups_count;
+	if (test_opt(sb, MINIX_DF)) {
+		sbi->s_overhead_last = 0;
+	} else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+		unsigned long ngroups = sbi->s_groups_count, i;
+		ext3_fsblk_t overhead = 0;
 		smp_rmb();
 
 		/*
-		 * Compute the overhead (FS structures)
+		 * Compute the overhead (FS structures).  This is constant
+		 * for a given filesystem unless the number of block groups
+		 * changes so we cache the previous value until it does.
 		 */
 
 		/*
@@ -2442,18 +2462,23 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * Every block group has an inode bitmap, a block
 		 * bitmap, and an inode table.
 		 */
-		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+		overhead += ngroups * (2 + sbi->s_itb_per_group);
+		sbi->s_overhead_last = overhead;
+		smp_wmb();
+		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
 	}
 
 	buf->f_type = EXT3_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
+	es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
 	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
 	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
+	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
 	buf->f_namelen = EXT3_NAME_LEN;
 	fsid = le64_to_cpup((void *)es->s_uuid) ^
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3b64bb16c72..9de54ae48de 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1585,7 +1585,7 @@ allocated:
 	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
 	if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
-	    in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
+	    in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
 	    in_range(ret_block, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group) ||
 	    in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be9..d4c8186aed6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
 	.open		= generic_file_open,
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
-	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0bcf62a750f..8416fa28c42 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2673,8 +2673,10 @@ void ext4_read_inode(struct inode * inode)
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
 		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT4_INODE_SIZE(inode->i_sb))
+		    EXT4_INODE_SIZE(inode->i_sb)) {
+			brelse (bh);
 			goto bad_inode;
+		}
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
 			ei->i_extra_isize = sizeof(struct ext4_inode) -
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2811e5720ad..2de339dd755 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1017,6 +1017,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str
 
 		if (!inode)
 			return ERR_PTR(-EACCES);
+
+		if (is_bad_inode(inode)) {
+			iput(inode);
+			return ERR_PTR(-ENOENT);
+		}
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1052,6 +1057,11 @@ struct dentry *ext4_get_parent(struct dentry *child)
 	if (!inode)
 		return ERR_PTR(-EACCES);
 
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		return ERR_PTR(-ENOENT);
+	}
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 175b68c6096..d0d8c76c7ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -510,6 +510,14 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 
 static void ext4_destroy_inode(struct inode *inode)
 {
+	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+		printk("EXT4 Inode %p: orphan list check failed!\n",
+			EXT4_I(inode));
+		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
+				EXT4_I(inode), sizeof(struct ext4_inode_info),
+				true);
+		dump_stack();
+	}
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
@@ -2150,6 +2158,7 @@ static int ext4_create_journal(struct super_block * sb,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
+	int err;
 
 	if (sb->s_flags & MS_RDONLY) {
 		printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
@@ -2157,13 +2166,15 @@ static int ext4_create_journal(struct super_block * sb,
 		return -EROFS;
 	}
 
-	if (!(journal = ext4_get_journal(sb, journal_inum)))
+	journal = ext4_get_journal(sb, journal_inum);
+	if (!journal)
 		return -EINVAL;
 
 	printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
 	       journal_inum);
 
-	if (jbd2_journal_create(journal)) {
+	err = jbd2_journal_create(journal);
+	if (err) {
 		printk(KERN_ERR "EXT4-fs: error creating journal.\n");
 		jbd2_journal_destroy(journal);
 		return -EIO;
@@ -2214,12 +2225,14 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
 
 	jbd2_journal_lock_updates(journal);
 	jbd2_journal_flush(journal);
+	lock_super(sb);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
 		ext4_commit_super(sb, es, 1);
 	}
+	unlock_super(sb);
 	jbd2_journal_unlock_updates(journal);
 }
 
@@ -2408,7 +2421,13 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
+			/*
+			 * We have to unlock super so that we can wait for
+			 * transactions.
+			 */
+			unlock_super(sb);
 			ext4_mark_recovery_complete(sb, es);
+			lock_super(sb);
 		} else {
 			__le32 ret;
 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -2481,19 +2500,19 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct super_block *sb = dentry->d_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
-	ext4_fsblk_t overhead;
-	int i;
 	u64 fsid;
 
-	if (test_opt (sb, MINIX_DF))
-		overhead = 0;
-	else {
-		unsigned long ngroups;
-		ngroups = EXT4_SB(sb)->s_groups_count;
+	if (test_opt(sb, MINIX_DF)) {
+		sbi->s_overhead_last = 0;
+	} else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+		unsigned long ngroups = sbi->s_groups_count, i;
+		ext4_fsblk_t overhead = 0;
 		smp_rmb();
 
 		/*
-		 * Compute the overhead (FS structures)
+		 * Compute the overhead (FS structures).  This is constant
+		 * for a given filesystem unless the number of block groups
+		 * changes so we cache the previous value until it does.
 		 */
 
 		/*
@@ -2517,18 +2536,23 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * Every block group has an inode bitmap, a block
 		 * bitmap, and an inode table.
 		 */
-		overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group));
+		overhead += ngroups * (2 + sbi->s_itb_per_group);
+		sbi->s_overhead_last = overhead;
+		smp_wmb();
+		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
 	}
 
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = ext4_blocks_count(es) - overhead;
+	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
+	es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
 	if (buf->f_bfree < ext4_r_blocks_count(es))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
+	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
 	buf->f_namelen = EXT4_NAME_LEN;
 	fsid = le64_to_cpup((void *)es->s_uuid) ^
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ccf161dffb6..72cbcd61bd9 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -313,7 +313,7 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
 	wchar_t bufuname[14];
 	unsigned char xlate_len, nr_slots;
 	wchar_t *unicode = NULL;
-	unsigned char work[8], bufname[260];	/* 256 + 4 */
+	unsigned char work[MSDOS_NAME], bufname[260];	/* 256 + 4 */
 	int uni_xlate = sbi->options.unicode_xlate;
 	int utf8 = sbi->options.utf8;
 	int anycase = (sbi->options.name_check != 's');
@@ -351,7 +351,8 @@ parse_record:
 		if (work[0] == 0x05)
 			work[0] = 0xE5;
 		for (i = 0, j = 0, last_u = 0; i < 8;) {
-			if (!work[i]) break;
+			if (!work[i])
+				break;
 			chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
 						&bufuname[j++], opt_shortname,
 						de->lcase & CASE_LOWER_BASE);
@@ -365,13 +366,15 @@ parse_record:
 		}
 		j = last_u;
 		fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
-		for (i = 0; i < 3;) {
-			if (!de->ext[i]) break;
-			chl = fat_shortname2uni(nls_disk, &de->ext[i], 3 - i,
+		for (i = 8; i < MSDOS_NAME;) {
+			if (!work[i])
+				break;
+			chl = fat_shortname2uni(nls_disk, &work[i],
+						MSDOS_NAME - i,
 						&bufuname[j++], opt_shortname,
 						de->lcase & CASE_LOWER_EXT);
 			if (chl <= 1) {
-				if (de->ext[i] != ' ')
+				if (work[i] != ' ')
 					last_u = j;
 			} else {
 				last_u = j;
@@ -445,7 +448,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
 	int fill_len;
 	wchar_t bufuname[14];
 	wchar_t *unicode = NULL;
-	unsigned char c, work[8], bufname[56], *ptname = bufname;
+	unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
 	unsigned long lpos, dummy, *furrfu = &lpos;
 	int uni_xlate = sbi->options.unicode_xlate;
 	int isvfat = sbi->options.isvfat;
@@ -527,7 +530,8 @@ parse_record:
 	if (work[0] == 0x05)
 		work[0] = 0xE5;
 	for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) {
-		if (!(c = work[i])) break;
+		if (!(c = work[i]))
+			break;
 		chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
 					&bufuname[j++], opt_shortname,
 					de->lcase & CASE_LOWER_BASE);
@@ -549,9 +553,10 @@ parse_record:
 	j = last_u;
 	fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
 	ptname[i++] = '.';
-	for (i2 = 0; i2 < 3;) {
-		if (!(c = de->ext[i2])) break;
-		chl = fat_shortname2uni(nls_disk, &de->ext[i2], 3 - i2,
+	for (i2 = 8; i2 < MSDOS_NAME;) {
+		if (!(c = work[i2]))
+			break;
+		chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2,
 					&bufuname[j++], opt_shortname,
 					de->lcase & CASE_LOWER_EXT);
 		if (chl <= 1) {
@@ -563,8 +568,8 @@ parse_record:
 			}
 		} else {
 			last_u = j;
-			for (chi = 0; chi < chl && i2 < 3; chi++) {
-				ptname[i++] = de->ext[i2++];
+			for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) {
+				ptname[i++] = work[i2++];
 				last = i;
 			}
 		}
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index ab171ea8e86..2c1b73fb82a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -17,6 +17,8 @@ struct fatent_operations {
 	int (*ent_next)(struct fat_entry *);
 };
 
+static DEFINE_SPINLOCK(fat12_entry_lock);
+
 static void fat12_ent_blocknr(struct super_block *sb, int entry,
 			      int *offset, sector_t *blocknr)
 {
@@ -116,10 +118,13 @@ static int fat12_ent_get(struct fat_entry *fatent)
 	u8 **ent12_p = fatent->u.ent12_p;
 	int next;
 
+	spin_lock(&fat12_entry_lock);
 	if (fatent->entry & 1)
 		next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4);
 	else
 		next = (*ent12_p[1] << 8) | *ent12_p[0];
+	spin_unlock(&fat12_entry_lock);
+
 	next &= 0x0fff;
 	if (next >= BAD_FAT12)
 		next = FAT_ENT_EOF;
@@ -151,6 +156,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
 	if (new == FAT_ENT_EOF)
 		new = EOF_FAT12;
 
+	spin_lock(&fat12_entry_lock);
 	if (fatent->entry & 1) {
 		*ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f);
 		*ent12_p[1] = new >> 4;
@@ -158,6 +164,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
 		*ent12_p[0] = new & 0xff;
 		*ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8);
 	}
+	spin_unlock(&fat12_entry_lock);
 
 	mark_buffer_dirty(fatent->bhs[0]);
 	if (fatent->nr_bhs == 2)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5..69a83b59dce 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
 	.release	= fat_file_release,
 	.ioctl		= fat_generic_ioctl,
 	.fsync		= file_fsync,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 479722d8966..cfaf5877d98 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -354,8 +354,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	} else { /* not a directory */
 		inode->i_generation |= 1;
 		inode->i_mode = MSDOS_MKMODE(de->attr,
-		    ((sbi->options.showexec &&
-			!is_exec(de->ext))
+		    ((sbi->options.showexec && !is_exec(de->name + 8))
 			? S_IRUGO|S_IWUGO : S_IRWXUGO)
 		    & ~sbi->options.fs_fmask) | S_IFREG;
 		MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h
index 8a4dfef1dda..3c96d6e6397 100644
--- a/fs/freevxfs/vxfs_dir.h
+++ b/fs/freevxfs/vxfs_dir.h
@@ -80,7 +80,7 @@ struct vxfs_direct {
  *	a d_name with size len.
  */
 #define VXFS_DIRPAD		4
-#define VXFS_NAMEMIN		((int)((struct vxfs_direct *)0)->d_name)
+#define VXFS_NAMEMIN		offsetof(struct vxfs_direct, d_name)
 #define VXFS_DIRROUND(len)	((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1))
 #define VXFS_DIRLEN(len)	(VXFS_DIRROUND(VXFS_NAMEMIN + (len)))
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b..f79de7c8cdf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
 	.release	= fuse_release,
 	.fsync		= fuse_fsync,
 	.lock		= fuse_file_lock,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
 	.release	= fuse_release,
 	.fsync		= fuse_fsync,
 	.lock		= fuse_file_lock,
-	/* no mmap and sendfile */
+	/* no mmap and splice_read */
 };
 
 static const struct address_space_operations fuse_file_aops  = {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9804c0cdcb4..cc5efc13496 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -655,10 +655,9 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
-	.fs_flags	= FS_HAS_SUBTYPE,
 	.get_sb		= fuse_get_sb_blk,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
 
 static inline int register_fuseblk(void)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e3f1ada643a..04ad0caebed 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
-	mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
 	ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c53a5d2d059..cd805a66880 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}
 
 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 			gfs2_free_data(ip, bstart, blen);
 	}
 
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_dinode_out(ip, dibh->b_data);
 
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 		goto out_gunlock_q;
 
 	error = gfs2_trans_begin(sdp,
-			sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+			sdp->sd_max_height + al->al_rgd->rd_length +
 			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	}
 
 	ip->i_di.di_size = size;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 	unsigned blocksize, iblock, length, pos;
 	struct buffer_head *bh;
 	struct page *page;
-	void *kaddr;
 	int err;
 
 	page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 		/* Uhhuh. Read error. Complain and punt. */
 		if (!buffer_uptodate(bh))
 			goto unlock;
+		err = 0;
 	}
 
 	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr + offset, 0, length);
-	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	zero_user_page(page, offset, length, KM_USER0);
 
 unlock:
 	unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 	if (gfs2_is_stuffed(ip)) {
 		ip->i_di.di_size = size;
-		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 		if (!error) {
 			ip->i_di.di_size = size;
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
 		ip->i_di.di_height = 0;
 		ip->i_di.di_goal_meta =
 			ip->i_di.di_goal_data =
-			ip->i_num.no_addr;
+			ip->i_no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 	}
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 683cb5bda87..3548d9f31e0 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/freezer.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
 	while (!kthread_should_stop()) {
 		gfs2_scand_internal(sdp);
 		t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}
 
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
 		wait_event_interruptible(sdp->sd_reclaim_wq,
 					 (atomic_read(&sdp->sd_reclaim_count) ||
 					 kthread_should_stop()));
+		if (freezing(current))
+			refrigerator();
 	}
 
 	return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
 	while (!kthread_should_stop()) {
 		gfs2_check_journals(sdp);
 		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}
 
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
 		}
 
 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}
 
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
 		gfs2_quota_scan(sdp);
 
 		t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}
 
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a96fa07b3f3..2beb2f401aa 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
@@ -228,7 +228,7 @@ out:
 
 	if (ip->i_di.di_size < offset + copied)
 		ip->i_di.di_size = offset + copied;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 		if (dip->i_di.di_entries != g.offset) {
 			fs_warn(sdp, "Number of entries corrupt in dir %llu, "
 				"ip->i_di.di_entries (%u) != g.offset (%u)\n",
-				(unsigned long long)dip->i_num.no_addr,
+				(unsigned long long)dip->i_no_addr,
 				dip->i_di.di_entries,
 				g.offset);
 			error = -EIO;
@@ -1488,24 +1488,55 @@ out:
  * Returns: errno
  */
 
-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
-		    struct gfs2_inum_host *inum, unsigned int *type)
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
+	struct inode *inode;
+
+	dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+	if (dent) {
+		if (IS_ERR(dent))
+			return ERR_PTR(PTR_ERR(dent));
+		inode = gfs2_inode_lookup(dir->i_sb, 
+				be16_to_cpu(dent->de_type),
+				be64_to_cpu(dent->de_inum.no_addr),
+				be64_to_cpu(dent->de_inum.no_formal_ino));
+		brelse(bh);
+		return inode;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+		   const struct gfs2_inode *ip)
+{
+	struct buffer_head *bh;
+	struct gfs2_dirent *dent;
+	int ret = -ENOENT;
 
 	dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
 	if (dent) {
 		if (IS_ERR(dent))
 			return PTR_ERR(dent);
-		if (inum)
-			gfs2_inum_in(inum, (char *)&dent->de_inum);
-		if (type)
-			*type = be16_to_cpu(dent->de_type);
+		if (ip) {
+			if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
+				goto out;
+			if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+			    ip->i_no_formal_ino)
+				goto out;
+			if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+			    be16_to_cpu(dent->de_type))) {
+				gfs2_consist_inode(GFS2_I(dir));
+				ret = -EIO;
+				goto out;
+			}
+		}
+		ret = 0;
+out:
 		brelse(bh);
-		return 0;
 	}
-	return -ENOENT;
+	return ret;
 }
 
 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
  */
 
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-		 const struct gfs2_inum_host *inum, unsigned type)
+		 const struct gfs2_inode *nip, unsigned type)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			if (IS_ERR(dent))
 				return PTR_ERR(dent);
 			dent = gfs2_init_dirent(inode, dent, name, bh);
-			gfs2_inum_out(inum, (char *)&dent->de_inum);
+			gfs2_inum_out(nip, dent);
 			dent->de_type = cpu_to_be16(type);
 			if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 				break;
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 		gfs2_consist_inode(dip);
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
  */
 
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum_host *inum, unsigned int new_type)
+		   const struct gfs2_inode *nip, unsigned int new_type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		return PTR_ERR(dent);
 
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
-	gfs2_inum_out(inum, (char *)&dent->de_inum);
+	gfs2_inum_out(nip, dent);
 	dent->de_type = cpu_to_be16(new_type);
 
 	if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	}
 
-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}
 
 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 48fe89046bb..8a468cac932 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,15 +16,16 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;
 
-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
-		    struct gfs2_inum_host *inum, unsigned int *type);
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+		   const struct gfs2_inode *ip);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-		 const struct gfs2_inum_host *inum, unsigned int type);
+		 const struct gfs2_inode *ip, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 		  filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum_host *new_inum, unsigned int new_type);
+		   const struct gfs2_inode *nip, unsigned int new_type);
 
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index c1f44009853..1ab3e9d7388 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -11,6 +11,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5b83ca6acab..2a7435b5c4d 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (error)
 		return error;
 
-	error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+	error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
 				 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
 	if (error)
 		goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		goto out_gunlock_q;
 
 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-				 blks + al->al_rgd->rd_ri.ri_length +
+				 blks + al->al_rgd->rd_length +
 				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 					    (er->er_mode & S_IFMT));
 			ip->i_inode.i_mode = er->er_mode;
 		}
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
 		ip->i_inode.i_mode = er->er_mode;
 	}
-	ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}
 
 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1815429a297..3f0974e1afe 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
 	clear_bit(HIF_WAIT, &gh->gh_iflags);
-	smp_mb();
+	smp_mb__after_clear_bit();
 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
 
-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
         schedule();
         return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+	might_sleep();
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 
 /**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
 
 	if (gl->gl_state == gl->gl_demote_state ||
 	    gl->gl_state == LM_ST_UNLOCKED) {
-		clear_bit(GLF_DEMOTE, &gl->gl_flags);
+		gfs2_demote_wake(gl);
 		return 0;
 	}
 	set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
 	spin_lock(&gl->gl_spin);
 	if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
 		gl->gl_demote_state = state;
 		gl->gl_demote_time = jiffies;
+		if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+		    gl->gl_object) {
+			struct inode *inode = igrab(gl->gl_object);
+			spin_unlock(&gl->gl_spin);
+			if (inode) {
+				d_prune_aliases(inode);
+				iput(inode);
+			}
+			return;
+		}
 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
 		gl->gl_demote_state = state;
 	}
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 		if (ret & LM_OUT_CANCELED)
 			op_done = 0;
 		else
-			clear_bit(GLF_DEMOTE, &gl->gl_flags);
+			gfs2_demote_wake(gl);
 	} else {
 		spin_lock(&gl->gl_spin);
 		list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	gfs2_assert_warn(sdp, !ret);
 
 	state_change(gl, LM_ST_UNLOCKED);
-	clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	gfs2_demote_wake(gl);
 
 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 
 	if (gh->gh_flags & GL_NOCACHE)
-		handle_callback(gl, LM_ST_UNLOCKED);
+		handle_callback(gl, LM_ST_UNLOCKED, 0);
 
 	gfs2_glmutex_lock(gl);
 
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	spin_unlock(&gl->gl_spin);
 }
 
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	gfs2_glock_dq(gh);
+	wait_on_demote(gl);
+}
+
 /**
  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
  * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
  * @num_gh: the number of structures
  * @ghs: an array of struct gfs2_holder structures
  *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
  *
  * Returns: 0 on success (all glocks acquired),
  *          errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-	int *e;
-	unsigned int x;
-	int borked = 0, serious = 0;
+	struct gfs2_holder *tmp[4];
+	struct gfs2_holder **pph = tmp;
 	int error = 0;
 
-	if (!num_gh)
+	switch(num_gh) {
+	case 0:
 		return 0;
-
-	if (num_gh == 1) {
+	case 1:
 		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
 		return gfs2_glock_nq(ghs);
-	}
-
-	e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-	if (!e)
-		return -ENOMEM;
-
-	for (x = 0; x < num_gh; x++) {
-		ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
-		error = gfs2_glock_nq(&ghs[x]);
-		if (error) {
-			borked = 1;
-			serious = error;
-			num_gh = x;
+	default:
+		if (num_gh <= 4)
 			break;
-		}
-	}
-
-	for (x = 0; x < num_gh; x++) {
-		error = e[x] = glock_wait_internal(&ghs[x]);
-		if (error) {
-			borked = 1;
-			if (error != GLR_TRYFAILED && error != GLR_CANCELED)
-				serious = error;
-		}
+		pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
+		if (!pph)
+			return -ENOMEM;
 	}
 
-	if (!borked) {
-		kfree(e);
-		return 0;
-	}
-
-	for (x = 0; x < num_gh; x++)
-		if (!e[x])
-			gfs2_glock_dq(&ghs[x]);
-
-	if (serious)
-		error = serious;
-	else {
-		for (x = 0; x < num_gh; x++)
-			gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
-					  &ghs[x]);
-		error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
-	}
+	error = nq_m_sync(num_gh, ghs, pph);
 
-	kfree(e);
+	if (pph != tmp)
+		kfree(pph);
 
 	return error;
 }
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
 	if (!gl)
 		return;
 
-	handle_callback(gl, state);
+	handle_callback(gl, state, 1);
 
 	spin_lock(&gl->gl_spin);
 	run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED)
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 }
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
 
 	print_dbg(gi, "  Inode:\n");
 	print_dbg(gi, "    num = %llu/%llu\n",
-		    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+		  (unsigned long long)ip->i_no_formal_ino,
+		  (unsigned long long)ip->i_no_addr);
 	print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
 	print_dbg(gi, "    i_flags =");
 	for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 	}
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 		print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-			  gl->gl_demote_state,
-			  (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+			  gl->gl_demote_state, (unsigned long long)
+			  (jiffies - gl->gl_demote_time)*(1000000/HZ));
 	}
 	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
 		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b3e152db70c..7721ca3fff9 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 
 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7b82657a991..777ca46010e 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
 		ip = NULL;
 
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		gfs2_log_flush(gl->gl_sbd, gl);
 		if (ip)
 			filemap_fdatawrite(ip->i_inode.i_mapping);
+		gfs2_log_flush(gl->gl_sbd, gl);
 		gfs2_meta_sync(gl);
 		if (ip) {
 			struct address_space *mapping = ip->i_inode.i_mapping;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d995441373a..170ba93829c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -28,6 +28,14 @@ struct gfs2_sbd;
 
 typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
 
+struct gfs2_log_header_host {
+	u64 lh_sequence;	/* Sequence number of this transaction */
+	u32 lh_flags;		/* GFS2_LOG_HEAD_... */
+	u32 lh_tail;		/* Block number of log tail */
+	u32 lh_blkno;
+	u32 lh_hash;
+};
+
 /*
  * Structure of operations that are associated with each
  * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
 	u32 bi_len;
 };
 
+struct gfs2_rgrp_host {
+	u32 rg_flags;
+	u32 rg_free;
+	u32 rg_dinodes;
+	u64 rg_igeneration;
+};
+
 struct gfs2_rgrpd {
 	struct list_head rd_list;	/* Link with superblock */
 	struct list_head rd_list_mru;
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
-	struct gfs2_rindex_host rd_ri;
+	u64 rd_addr;			/* grp block disk address */
+	u64 rd_data0;			/* first data location */
+	u32 rd_length;			/* length of rgrp header in fs blocks */
+	u32 rd_data;			/* num of data blocks in rgrp */
+	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
 	u32 rd_last_alloc_data;
 	u32 rd_last_alloc_meta;
 	struct gfs2_sbd *rd_sbd;
+	unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };
 
 enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
 	GIF_SW_PAGED		= 3,
 };
 
+struct gfs2_dinode_host {
+	u64 di_size;		/* number of bytes in file */
+	u64 di_blocks;		/* number of blocks in file */
+	u64 di_goal_meta;	/* rgrp to alloc from next */
+	u64 di_goal_data;	/* data block goal */
+	u64 di_generation;	/* generation number for NFS */
+	u32 di_flags;		/* GFS2_DIF_... */
+	u16 di_height;		/* height of metadata */
+	/* These only apply to directories  */
+	u16 di_depth;		/* Number of bits in the table */
+	u32 di_entries;		/* The number of entries in the directory */
+	u64 di_eattr;		/* extended attribute block number */
+};
+
 struct gfs2_inode {
 	struct inode i_inode;
-	struct gfs2_inum_host i_num;
-
+	u64 i_no_addr;
+	u64 i_no_formal_ino;
 	unsigned long i_flags;		/* GIF_... */
 
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
 	QDF_LOCKED		= 2,
 };
 
-struct gfs2_quota_lvb {
-        __be32 qb_magic;
-        u32 __pad;
-        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
-        __be64 qb_warn;       /* Warn user when alloc is above this # */
-        __be64 qb_value;       /* Current # blocks allocated */
-};
-
 struct gfs2_quota_data {
 	struct list_head qd_list;
 	unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
 
 	unsigned int tr_num_buf;
 	unsigned int tr_num_buf_new;
+	unsigned int tr_num_databuf_new;
 	unsigned int tr_num_buf_rm;
+	unsigned int tr_num_databuf_rm;
 	struct list_head tr_list_buf;
 
 	unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
 	unsigned int jd_blocks;
 };
 
+struct gfs2_statfs_change_host {
+	s64 sc_total;
+	s64 sc_free;
+	s64 sc_dinodes;
+};
+
 #define GFS2_GLOCKD_DEFAULT	1
 #define GFS2_GLOCKD_MAX		16
 
@@ -426,6 +461,28 @@ enum {
 
 #define GFS2_FSNAME_LEN		256
 
+struct gfs2_inum_host {
+	u64 no_formal_ino;
+	u64 no_addr;
+};
+
+struct gfs2_sb_host {
+	u32 sb_magic;
+	u32 sb_type;
+	u32 sb_format;
+
+	u32 sb_fs_format;
+	u32 sb_multihost_format;
+	u32 sb_bsize;
+	u32 sb_bsize_shift;
+
+	struct gfs2_inum_host sb_master_dir;
+	struct gfs2_inum_host sb_root_dir;
+
+	char sb_lockproto[GFS2_LOCKNAME_LEN];
+	char sb_locktable[GFS2_LOCKNAME_LEN];
+};
+
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
 	struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
 
 	unsigned int sd_log_blks_reserved;
 	unsigned int sd_log_commited_buf;
+	unsigned int sd_log_commited_databuf;
 	unsigned int sd_log_commited_revoke;
 
 	unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
 	unsigned int sd_log_num_rg;
 	unsigned int sd_log_num_databuf;
 	unsigned int sd_log_num_jdata;
-	unsigned int sd_log_num_hdrs;
 
 	struct list_head sd_log_le_gl;
 	struct list_head sd_log_le_buf;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df0b8b3018b..34f7bcdea1e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
 #include "trans.h"
 #include "util.h"
 
+struct gfs2_inum_range_host {
+	u64 ir_start;
+	u64 ir_length;
+};
+
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum_host *inum = opaque;
+	u64 *no_addr = opaque;
 
-	if (ip->i_num.no_addr == inum->no_addr &&
+	if (ip->i_no_addr == *no_addr &&
 	    inode->i_private != NULL)
 		return 1;
 
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum_host *inum = opaque;
+	u64 *no_addr = opaque;
 
-	ip->i_num = *inum;
-	inode->i_ino = inum->no_addr;
+	inode->i_ino = (unsigned long)*no_addr;
+	ip->i_no_addr = *no_addr;
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+{
+	unsigned long hash = (unsigned long)no_addr;
+	return ilookup5(sb, hash, iget_test, &no_addr);
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 {
-	return ilookup5(sb, (unsigned long)inum->no_addr,
-			iget_test, inum);
+	unsigned long hash = (unsigned long)no_addr;
+	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+
+void gfs2_set_iop(struct inode *inode)
 {
-	return iget5_locked(sb, (unsigned long)inum->no_addr,
-		     iget_test, iget_set, inum);
+	umode_t mode = inode->i_mode;
+
+	if (S_ISREG(mode)) {
+		inode->i_op = &gfs2_file_iops;
+		inode->i_fop = &gfs2_file_fops;
+		inode->i_mapping->a_ops = &gfs2_file_aops;
+	} else if (S_ISDIR(mode)) {
+		inode->i_op = &gfs2_dir_iops;
+		inode->i_fop = &gfs2_dir_fops;
+	} else if (S_ISLNK(mode)) {
+		inode->i_op = &gfs2_symlink_iops;
+	} else {
+		inode->i_op = &gfs2_dev_iops;
+	}
+
+	unlock_new_inode(inode);
 }
 
 /**
  * gfs2_inode_lookup - Lookup an inode
  * @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
  * @type: The type of the inode
  *
  * Returns: A VFS inode, or an error
  */
 
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, 
+				unsigned int type,
+				u64 no_addr,
+				u64 no_formal_ino)
 {
-	struct inode *inode = gfs2_iget(sb, inum);
+	struct inode *inode = gfs2_iget(sb, no_addr);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_glock *io_gl;
 	int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
-		umode_t mode = DT2IF(type);
 		inode->i_private = ip;
-		inode->i_mode = mode;
-
-		if (S_ISREG(mode)) {
-			inode->i_op = &gfs2_file_iops;
-			inode->i_fop = &gfs2_file_fops;
-			inode->i_mapping->a_ops = &gfs2_file_aops;
-		} else if (S_ISDIR(mode)) {
-			inode->i_op = &gfs2_dir_iops;
-			inode->i_fop = &gfs2_dir_fops;
-		} else if (S_ISLNK(mode)) {
-			inode->i_op = &gfs2_symlink_iops;
-		} else {
-			inode->i_op = &gfs2_dev_iops;
-		}
+		ip->i_no_formal_ino = no_formal_ino;
 
-		error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
 		if (unlikely(error))
 			goto fail;
 		ip->i_gl->gl_object = ip;
 
-		error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
 		if (unlikely(error))
 			goto fail_put;
 
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
 			goto fail_iopen;
+		ip->i_iopen_gh.gh_gl->gl_object = ip;
 
 		gfs2_glock_put(io_gl);
-		unlock_new_inode(inode);
+
+		if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+			goto gfs2_nfsbypass;
+
+		inode->i_mode = DT2IF(type);
+
+		/*
+		 * We must read the inode in order to work out its type in
+		 * this case. Note that this doesn't happen often as we normally
+		 * know the type beforehand. This code path only occurs during
+		 * unlinked inode recovery (where it is safe to do this glock,
+		 * which is not true in the general case).
+		 */
+		if (type == DT_UNKNOWN) {
+			struct gfs2_holder gh;
+			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+			if (unlikely(error))
+				goto fail_glock;
+			/* Inode is now uptodate */
+			gfs2_glock_dq_uninit(&gh);
+		}
+
+		gfs2_set_iop(inode);
 	}
 
+gfs2_nfsbypass:
 	return inode;
+fail_glock:
+	gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
 	gfs2_glock_put(io_gl);
 fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 
-	if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+	if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		return -EIO;
 	}
-	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
-		return -ESTALE;
-
+	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
 	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
 	ip->i_inode.i_rdev = 0;
 	switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_blocks = be64_to_cpu(str->di_blocks);
 	gfs2_set_inode_blocks(&ip->i_inode);
 	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-	ip->i_inode.i_atime.tv_nsec = 0;
+	ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-	ip->i_inode.i_mtime.tv_nsec = 0;
+	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
 	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	ip->i_inode.i_ctime.tv_nsec = 0;
+	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
 
 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 	if (error)
 		goto out_qs;
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	else
 		drop_nlink(&ip->i_inode);
 
-	ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_holder d_gh;
-	struct gfs2_inum_host inum;
-	unsigned int type;
-	int error;
+	int error = 0;
 	struct inode *inode = NULL;
 	int unlock = 0;
 
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 			goto out;
 	}
 
-	error = gfs2_dir_search(dir, name, &inum, &type);
-	if (error)
-		goto out;
-
-	inode = gfs2_inode_lookup(sb, &inum, type);
-
+	inode = gfs2_dir_search(dir, name);
+	if (IS_ERR(inode))
+		error = PTR_ERR(inode);
 out:
 	if (unlock)
 		gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
 	return inode ? inode : ERR_PTR(error);
 }
 
+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+	const struct gfs2_inum_range *str = buf;
+
+	ir->ir_start = be64_to_cpu(str->ir_start);
+	ir->ir_length = be64_to_cpu(str->ir_length);
+}
+
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+	struct gfs2_inum_range *str = buf;
+
+	str->ir_start = cpu_to_be64(ir->ir_start);
+	str->ir_length = cpu_to_be64(ir->ir_length);
+}
+
 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (!dip->i_inode.i_nlink)
 		return -EPERM;
 
-	error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+	error = gfs2_dir_check(&dip->i_inode, name, NULL);
 	switch (error) {
 	case -ENOENT:
 		error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 		*gid = current->fsgid;
 }
 
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
-			u64 *generation)
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
 	if (error)
 		goto out_ipreserv;
 
-	inum->no_addr = gfs2_alloc_di(dip, generation);
+	*no_addr = gfs2_alloc_di(dip, generation);
 
 	gfs2_trans_end(sdp);
 
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
 	struct buffer_head *dibh;
+	struct timespec tv = CURRENT_TIME;
 
 	dibh = gfs2_meta_new(gl, inum->no_addr);
 	gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_nlink = 0;
 	di->di_size = 0;
 	di->di_blocks = cpu_to_be64(1);
-	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
 	di->di_major = cpu_to_be32(MAJOR(dev));
 	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_entries = 0;
 	memset(&di->__pad4, 0, sizeof(di->__pad4));
 	di->di_eattr = 0;
+	di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+	di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+	di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
 
 	brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 2 * RES_DINODE +
 					 RES_STATFS + RES_QUOTA, 0);
 		if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 	}
 
-	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+	error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto fail_end_trans;
 
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			   unsigned int mode, dev_t dev)
 {
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
 	struct inode *dir = &dip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_inum_host inum;
+	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
 	u64 generation;
 
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock;
 
-	error = alloc_dinode(dip, &inum, &generation);
+	error = alloc_dinode(dip, &inum.no_addr, &generation);
 	if (error)
 		goto fail_gunlock;
 
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock2;
 
-	inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+	inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+					inum.no_addr,
+					inum.no_formal_ino);
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
 	error = gfs2_inode_refresh(GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = gfs2_acl_create(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = gfs2_security_init(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	error = link_dinode(dip, name, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;
 
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	return inode;
 
-fail_iput:
-	iput(inode);
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
+	if (inode)
+		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq(ghs);
 fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
  */
 
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   struct gfs2_inode *ip)
+		   const struct gfs2_inode *ip)
 {
-	struct gfs2_inum_host inum;
-	unsigned int type;
 	int error;
 
 	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+	error = gfs2_dir_check(&dip->i_inode, name, ip);
 	if (error)
 		return error;
 
-	if (!gfs2_inum_equal(&inum, &ip->i_num))
-		return -ENOENT;
-
-	if (IF2DT(ip->i_inode.i_mode) != type) {
-		gfs2_consist_inode(dip);
-		return -EIO;
-	}
-
 	return 0;
 }
 
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct gfs2_inode *ip = gl->gl_object;
-	s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+	s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
 	unsigned int state;
 	int flags;
 	int error;
+	struct timespec tv = CURRENT_TIME;
 
 	if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
 	    gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 	    (sdp->sd_vfs->s_flags & MS_RDONLY))
 		return 0;
 
-	curtime = get_seconds();
-	if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+	if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
 		gfs2_glock_dq(gh);
 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
 				   gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		/* Verify that atime hasn't been updated while we were
 		   trying to get exclusive lock. */
 
-		curtime = get_seconds();
-		if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+		tv = CURRENT_TIME;
+		if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
 			struct buffer_head *dibh;
 			struct gfs2_dinode *di;
 
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 			if (error)
 				goto fail_end_trans;
 
-			ip->i_inode.i_atime.tv_sec = curtime;
+			ip->i_inode.i_atime = tv;
 
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			di = (struct gfs2_dinode *)dibh->b_data;
 			di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+			di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
 			brelse(dibh);
 
 			gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	return error;
 }
 
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+	const struct gfs2_dinode_host *di = &ip->i_di;
+	struct gfs2_dinode *str = buf;
+
+	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+	str->di_header.__pad0 = 0;
+	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+	str->di_header.__pad1 = 0;
+	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+	str->di_size = cpu_to_be64(di->di_size);
+	str->di_blocks = cpu_to_be64(di->di_blocks);
+	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+	str->di_goal_data = cpu_to_be64(di->di_goal_data);
+	str->di_generation = cpu_to_be64(di->di_generation);
+
+	str->di_flags = cpu_to_be32(di->di_flags);
+	str->di_height = cpu_to_be16(di->di_height);
+	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+					     GFS2_FORMAT_DE : 0);
+	str->di_depth = cpu_to_be16(di->di_depth);
+	str->di_entries = cpu_to_be32(di->di_entries);
+
+	str->di_eattr = cpu_to_be64(di->di_eattr);
+	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+	const struct gfs2_dinode_host *di = &ip->i_di;
+
+	printk(KERN_INFO "  no_formal_ino = %llu\n",
+	       (unsigned long long)ip->i_no_formal_ino);
+	printk(KERN_INFO "  no_addr = %llu\n",
+	       (unsigned long long)ip->i_no_addr);
+	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+	printk(KERN_INFO "  di_blocks = %llu\n",
+	       (unsigned long long)di->di_blocks);
+	printk(KERN_INFO "  di_goal_meta = %llu\n",
+	       (unsigned long long)di->di_goal_meta);
+	printk(KERN_INFO "  di_goal_data = %llu\n",
+	       (unsigned long long)di->di_goal_data);
+	printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+	printk(KERN_INFO "  di_height = %u\n", di->di_height);
+	printk(KERN_INFO "  di_depth = %u\n", di->di_depth);
+	printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+	printk(KERN_INFO "  di_eattr = %llu\n",
+	       (unsigned long long)di->di_eattr);
+}
+
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b57f448b15b..4517ac82c01 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__
 
-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
 	return !ip->i_di.di_height;
 }
 
-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
 	return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
 
-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
 	return S_ISDIR(ip->i_inode.i_mode);
 }
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 }
 
+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+				  u64 no_formal_ino)
+{
+	return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+				 struct gfs2_dirent *dent)
+{
+	dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+	dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
+
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+void gfs2_set_iop(struct inode *inode);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+				u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 int gfs2_inode_refresh(struct gfs2_inode *ip);
 
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 		struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   struct gfs2_inode *ip);
+		   const struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);
 
 #endif /* __INODE_DOT_H__ */
 
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index c305255bfe8..542a797ac89 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
 	lp->cur = DLM_LOCK_IV;
 	lp->lvb = NULL;
 	lp->hold_null = NULL;
-	init_completion(&lp->ast_wait);
 	INIT_LIST_HEAD(&lp->clist);
 	INIT_LIST_HEAD(&lp->blist);
 	INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
 	lp->lksb.sb_lvbptr = NULL;
 }
 
+static int gdlm_ast_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 /* This can do a synchronous dlm request (requiring a lock_dlm thread to get
    the completion) because gfs won't call hold_lvb() during a callback (from
    the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
 	lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
 	set_bit(LFL_NOBAST, &lpn->flags);
 	set_bit(LFL_INLOCK, &lpn->flags);
+	set_bit(LFL_AST_WAIT, &lpn->flags);
 
-	init_completion(&lpn->ast_wait);
 	gdlm_do_lock(lpn);
-	wait_for_completion(&lpn->ast_wait);
+	wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
 	error = lpn->lksb.sb_status;
 	if (error) {
 		printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index d074c6e6f9b..24d70f73b65 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@ enum {
 	LFL_NOBAST		= 10,
 	LFL_HEADQUE		= 11,
 	LFL_UNLOCK_DELETE	= 12,
+	LFL_AST_WAIT		= 13,
 };
 
 struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
 	unsigned long		flags;		/* lock_dlm flags LFL_ */
 
 	int			bast_mode;	/* protected by async_lock */
-	struct completion	ast_wait;
 
 	struct list_head	clist;		/* complete */
 	struct list_head	blist;		/* blocking */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1d8faa3da8a..41c5b04caab 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
 
 	error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
 				  &ls->dlm_lockspace,
-				  nodir ? DLM_LSFL_NODIR : 0,
+				  DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
 				  GDLM_LVB_SIZE);
 	if (error) {
 		log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index f82495e18c2..fba1f1d87e4 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	op->info.number		= name->ln_number;
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
-
+	op->info.owner		= (__u64)(long) fl->fl_owner;
 
 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	}
 	spin_unlock(&ops_lock);
 
+	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+	   -ENOENT if there are no locks on the file */
+
 	rv = op->info.rv;
 
 	fl->fl_type = F_UNLCK;
 	if (rv == -ENOENT)
 		rv = 0;
-	else if (rv == 0 && op->info.pid != fl->fl_pid) {
+	else if (rv > 0) {
 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
 		fl->fl_pid = op->info.pid;
 		fl->fl_start = op->info.start;
 		fl->fl_end = op->info.end;
+		rv = 0;
 	}
 
 	kfree(op);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 9cf1f168eaf..1aca51e4509 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
 	ls->fscb(ls->sdp, cb, &lp->lockname);
 }
 
+static void wake_up_ast(struct gdlm_lock *lp)
+{
+	clear_bit(LFL_AST_WAIT, &lp->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
+
 static void process_complete(struct gdlm_lock *lp)
 {
 	struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
 	 */
 
 	if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-		complete(&lp->ast_wait);
+		wake_up_ast(lp);
 		return;
 	}
 
@@ -214,7 +221,7 @@ out:
 	if (test_bit(LFL_INLOCK, &lp->flags)) {
 		clear_bit(LFL_NOBLOCK, &lp->flags);
 		lp->cur = lp->req;
-		complete(&lp->ast_wait);
+		wake_up_ast(lp);
 		return;
 	}
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe5..f49a12e2408 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 
 			gfs2_assert(sdp, bd->bd_ail == ai);
 
+			if (!bh){
+				list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
+
 			if (!buffer_busy(bh)) {
 				if (!buffer_uptodate(bh)) {
 					gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 					 bd_ail_st_list) {
 		bh = bd->bd_bh;
 
+		if (!bh){
+			list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+			continue;
+		}
+
 		gfs2_assert(sdp, bd->bd_ail == ai);
 
 		if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
  *
- * Note that we never give out the last 6 blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * Note that we never give out the last few blocks of the journal. Thats
+ * due to the fact that there is a small number of header blocks
  * associated with each log flush. The exact number can't be known until
  * flush time, so we ensure that we have just enough free blocks at all
  * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
 	unsigned int try = 0;
+	unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
 
 	if (gfs2_assert_warn(sdp, blks) ||
 	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 
 	mutex_lock(&sdp->sd_log_reserve_mutex);
 	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= (blks + 6)) {
+	while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
 		gfs2_log_unlock(sdp);
 		gfs2_ail1_empty(sdp, 0);
 		gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 	return dist;
 }
 
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+	unsigned int reserved = 0;
+	unsigned int mbuf_limit, metabufhdrs_needed;
+	unsigned int dbuf_limit, databufhdrs_needed;
+	unsigned int revokes = 0;
+
+	mbuf_limit = buf_limit(sdp);
+	metabufhdrs_needed = (sdp->sd_log_commited_buf +
+			      (mbuf_limit - 1)) / mbuf_limit;
+	dbuf_limit = databuf_limit(sdp);
+	databufhdrs_needed = (sdp->sd_log_commited_databuf +
+			      (dbuf_limit - 1)) / dbuf_limit;
+
+	if (sdp->sd_log_commited_revoke)
+		revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+					  sizeof(u64));
+
+	reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+		sdp->sd_log_commited_databuf + databufhdrs_needed +
+		revokes;
+	/* One for the overall header */
+	if (reserved)
+		reserved++;
+	return reserved;
+}
+
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
 	struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 	return bh;
 }
 
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
 	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
 
 	ail2_empty(sdp, new_tail);
 
 	gfs2_log_lock(sdp);
-	sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+	sdp->sd_log_blks_free += dist;
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
 
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	brelse(bh);
 
 	if (sdp->sd_log_tail != tail)
-		log_pull_tail(sdp, tail, pull);
+		log_pull_tail(sdp, tail);
 	else
 		gfs2_assert_withdraw(sdp, !pull);
 
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 	struct list_head *head = &sdp->sd_log_flush_list;
 	struct gfs2_log_buf *lb;
 	struct buffer_head *bh;
+	int flushcount = 0;
 
 	while (!list_empty(head)) {
 		lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 		} else
 			brelse(bh);
 		kfree(lb);
+		flushcount++;
 	}
 
-	log_write_header(sdp, 0, 0);
+	/* If nothing was journaled, the header is unplanned and unwanted. */
+	if (flushcount) {
+		log_write_header(sdp, 0, 0);
+	} else {
+		unsigned int tail;
+		tail = current_tail(sdp);
+
+		gfs2_ail1_empty(sdp, 0);
+		if (sdp->sd_log_tail != tail)
+			log_pull_tail(sdp, tail);
+	}
 }
 
 /**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
 	INIT_LIST_HEAD(&ai->ai_ail2_list);
 
-	gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+			     sdp->sd_log_commited_buf +
+			     sdp->sd_log_commited_databuf);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	lops_before_commit(sdp);
 	if (!list_empty(&sdp->sd_log_flush_list))
 		log_flush_commit(sdp);
-	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+		gfs2_log_lock(sdp);
+		sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
+	}
 	lops_after_commit(sdp, ai);
 
 	gfs2_log_lock(sdp);
 	sdp->sd_log_head = sdp->sd_log_flush_head;
-	sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_commited_buf = 0;
-	sdp->sd_log_num_hdrs = 0;
+	sdp->sd_log_commited_databuf = 0;
 	sdp->sd_log_commited_revoke = 0;
 
 	if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-	unsigned int reserved = 0;
+	unsigned int reserved;
 	unsigned int old;
 
 	gfs2_log_lock(sdp);
 
 	sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+	sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+		tr->tr_num_databuf_rm;
+	gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+			     (((int)sdp->sd_log_commited_databuf) >= 0));
 	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
 	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
-
-	if (sdp->sd_log_commited_buf)
-		reserved += sdp->sd_log_commited_buf;
-	if (sdp->sd_log_commited_revoke)
-		reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-					    sizeof(u64));
-	if (reserved)
-		reserved++;
-
+	reserved = calc_reserved(sdp);
 	old = sdp->sd_log_blks_free;
 	sdp->sd_log_blks_free += tr->tr_reserved -
 				 (reserved - sdp->sd_log_blks_reserved);
 
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-	gfs2_assert_withdraw(sdp,
-			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
-			     sdp->sd_log_num_hdrs);
+	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+			     sdp->sd_jdesc->jd_blocks);
 
 	sdp->sd_log_blks_reserved = reserved;
 
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
 	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
 
 	sdp->sd_log_flush_head = sdp->sd_log_head;
 	sdp->sd_log_flush_wrapped = 0;
 
-	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+			 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
 
 	gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f82d84d05d2..aff70f0698f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
 
 #include "gfs2.h"
 #include "incore.h"
+#include "inode.h"
 #include "glock.h"
 #include "log.h"
 #include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 	struct gfs2_log_descriptor *ld;
 	struct gfs2_bufdata *bd1 = NULL, *bd2;
 	unsigned int total = sdp->sd_log_num_buf;
-	unsigned int offset = sizeof(struct gfs2_log_descriptor);
+	unsigned int offset = BUF_OFFSET;
 	unsigned int limit;
 	unsigned int num;
 	unsigned n;
 	__be64 *ptr;
 
-	offset += sizeof(__be64) - 1;
-	offset &= ~(sizeof(__be64) - 1);
-	limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+	limit = buf_limit(sdp);
 	/* for 4k blocks, limit = 503 */
 
 	bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		if (total > limit)
 			num = limit;
 		bh = gfs2_log_get_buf(sdp);
-		sdp->sd_log_num_hdrs++;
 		ld = (struct gfs2_log_descriptor *)bh->b_data;
 		ptr = (__be64 *)(bh->b_data + offset);
 		ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 	struct gfs2_inode *ip = GFS2_I(mapping->host);
 
 	gfs2_log_lock(sdp);
+	if (!list_empty(&bd->bd_list_tr)) {
+		gfs2_log_unlock(sdp);
+		return;
+	}
 	tr->tr_touched = 1;
-	if (list_empty(&bd->bd_list_tr) &&
-	    (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+	if (gfs2_is_jdata(ip)) {
 		tr->tr_num_buf++;
 		list_add(&bd->bd_list_tr, &tr->tr_list_buf);
-		gfs2_log_unlock(sdp);
-		gfs2_pin(sdp, bd->bd_bh);
-		tr->tr_num_buf_new++;
-	} else {
-		gfs2_log_unlock(sdp);
 	}
+	gfs2_log_unlock(sdp);
+	if (!list_empty(&le->le_list))
+		return;
+
 	gfs2_trans_add_gl(bd->bd_gl);
-	gfs2_log_lock(sdp);
-	if (list_empty(&le->le_list)) {
-		if (ip->i_di.di_flags & GFS2_DIF_JDATA)
-			sdp->sd_log_num_jdata++;
-		sdp->sd_log_num_databuf++;
-		list_add(&le->le_list, &sdp->sd_log_le_databuf);
+	if (gfs2_is_jdata(ip)) {
+		sdp->sd_log_num_jdata++;
+		gfs2_pin(sdp, bd->bd_bh);
+		tr->tr_num_databuf_new++;
 	}
+	sdp->sd_log_num_databuf++;
+	gfs2_log_lock(sdp);
+	list_add(&le->le_list, &sdp->sd_log_le_databuf);
 	gfs2_log_unlock(sdp);
 }
 
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	LIST_HEAD(started);
 	struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
 	struct buffer_head *bh = NULL,*bh1 = NULL;
-	unsigned int offset = sizeof(struct gfs2_log_descriptor);
 	struct gfs2_log_descriptor *ld;
 	unsigned int limit;
 	unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	unsigned int num, n;
 	__be64 *ptr = NULL;
 
-	offset += 2*sizeof(__be64) - 1;
-	offset &= ~(2*sizeof(__be64) - 1);
-	limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+	limit = databuf_limit(sdp);
 
 	/*
 	 * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				gfs2_log_unlock(sdp);
 				if (!bh) {
 					bh = gfs2_log_get_buf(sdp);
-					sdp->sd_log_num_hdrs++;
 					ld = (struct gfs2_log_descriptor *)
 					     bh->b_data;
-					ptr = (__be64 *)(bh->b_data + offset);
+					ptr = (__be64 *)(bh->b_data +
+							 DATABUF_OFFSET);
 					ld->ld_header.mh_magic =
 						cpu_to_be32(GFS2_MAGIC);
 					ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				if (unlikely(magic != 0))
 					set_buffer_escaped(bh1);
 				gfs2_log_lock(sdp);
-				if (n++ > num)
+				if (++n >= num)
 					break;
 			} else if (!bh1) {
 				total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 		gfs2_log_unlock(sdp);
 		if (bh) {
+			set_buffer_mapped(bh);
 			set_buffer_dirty(bh);
 			ll_rw_block(WRITE, 1, &bh);
 			bh = NULL;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 965bc65c7c6..41a00df7558 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
 #include <linux/list.h>
 #include "incore.h"
 
+#define BUF_OFFSET \
+	((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+	 ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+	((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+	 ~(2 * sizeof(__be64) - 1))
+
 extern const struct gfs2_log_operations gfs2_glock_lops;
 extern const struct gfs2_log_operations gfs2_buf_lops;
 extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 
+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+	unsigned int limit;
+
+	limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+	return limit;
+}
+
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+	unsigned int limit;
+
+	limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+	return limit;
+}
+
 static inline void lops_init_le(struct gfs2_log_element *le,
 				const struct gfs2_log_operations *lops)
 {
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e62d4f620c5..8da343b34ae 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 
 			if (test_clear_buffer_pinned(bh)) {
 				struct gfs2_trans *tr = current->journal_info;
+				struct gfs2_inode *bh_ip =
+					GFS2_I(bh->b_page->mapping->host);
+
 				gfs2_log_lock(sdp);
 				list_del_init(&bd->bd_le.le_list);
 				gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
 				sdp->sd_log_num_buf--;
 				gfs2_log_unlock(sdp);
-				tr->tr_num_buf_rm++;
+				if (bh_ip->i_inode.i_private != NULL)
+					tr->tr_num_databuf_rm++;
+				else
+					tr->tr_num_buf_rm++;
 				brelse(bh);
 			}
 			if (bd) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index e037425bc04..527bf19d969 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 					 struct buffer_head **bhp)
 {
-	return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+	return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
 }
 
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d..6f006a804db 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 	char *options, *o, *v;
 	int error = 0;
 
-	if (!remount) {
-		/*  If someone preloaded options, use those instead  */
-		spin_lock(&gfs2_sys_margs_lock);
-		if (gfs2_sys_margs) {
-			data = gfs2_sys_margs;
-			gfs2_sys_margs = NULL;
-		}
-		spin_unlock(&gfs2_sys_margs_lock);
-
-		/*  Set some defaults  */
-		args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-		args->ar_quota = GFS2_QUOTA_DEFAULT;
-		args->ar_data = GFS2_DATA_DEFAULT;
+	/*  If someone preloaded options, use those instead  */
+	spin_lock(&gfs2_sys_margs_lock);
+	if (!remount && gfs2_sys_margs) {
+		data = gfs2_sys_margs;
+		gfs2_sys_margs = NULL;
 	}
+	spin_unlock(&gfs2_sys_margs_lock);
+
+	/*  Set some defaults  */
+	memset(args, 0, sizeof(struct gfs2_args));
+	args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+	args->ar_quota = GFS2_QUOTA_DEFAULT;
+	args->ar_data = GFS2_DATA_DEFAULT;
 
 	/* Split the options into tokens with the "," character and
 	   process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644
index d9ecfd23a49..00000000000
--- a/fs/gfs2/ondisk.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-
-#include "gfs2.h"
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "incore.h"
-
-#define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
-				       struct->member);
-
-/*
- * gfs2_xxx_in - read in an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_out - write out an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_print - print out an xxx struct
- * first arg: the cpu-order structure
- */
-
-void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
-{
-	const struct gfs2_inum *str = buf;
-
-	no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
-	no->no_addr = be64_to_cpu(str->no_addr);
-}
-
-void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
-{
-	struct gfs2_inum *str = buf;
-
-	str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
-	str->no_addr = cpu_to_be64(no->no_addr);
-}
-
-static void gfs2_inum_print(const struct gfs2_inum_host *no)
-{
-	printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
-	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
-}
-
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
-{
-	const struct gfs2_meta_header *str = buf;
-
-	mh->mh_magic = be32_to_cpu(str->mh_magic);
-	mh->mh_type = be32_to_cpu(str->mh_type);
-	mh->mh_format = be32_to_cpu(str->mh_format);
-}
-
-void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-	const struct gfs2_sb *str = buf;
-
-	gfs2_meta_header_in(&sb->sb_header, buf);
-
-	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-
-	gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
-	gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
-
-	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
-{
-	const struct gfs2_rindex *str = buf;
-
-	ri->ri_addr = be64_to_cpu(str->ri_addr);
-	ri->ri_length = be32_to_cpu(str->ri_length);
-	ri->ri_data0 = be64_to_cpu(str->ri_data0);
-	ri->ri_data = be32_to_cpu(str->ri_data);
-	ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
-
-}
-
-void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
-{
-	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
-	pv(ri, ri_length, "%u");
-
-	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
-	pv(ri, ri_data, "%u");
-
-	pv(ri, ri_bitbytes, "%u");
-}
-
-void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
-{
-	const struct gfs2_rgrp *str = buf;
-
-	rg->rg_flags = be32_to_cpu(str->rg_flags);
-	rg->rg_free = be32_to_cpu(str->rg_free);
-	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
-}
-
-void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
-{
-	struct gfs2_rgrp *str = buf;
-
-	str->rg_flags = cpu_to_be32(rg->rg_flags);
-	str->rg_free = cpu_to_be32(rg->rg_free);
-	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-	str->__pad = cpu_to_be32(0);
-	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
-	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
-}
-
-void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
-{
-	const struct gfs2_quota *str = buf;
-
-	qu->qu_limit = be64_to_cpu(str->qu_limit);
-	qu->qu_warn = be64_to_cpu(str->qu_warn);
-	qu->qu_value = be64_to_cpu(str->qu_value);
-}
-
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-	const struct gfs2_dinode_host *di = &ip->i_di;
-	struct gfs2_dinode *str = buf;
-
-	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-	str->di_header.__pad0 = 0;
-	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-	str->di_header.__pad1 = 0;
-
-	gfs2_inum_out(&ip->i_num, &str->di_num);
-
-	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(di->di_size);
-	str->di_blocks = cpu_to_be64(di->di_blocks);
-	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-
-	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
-	str->di_goal_data = cpu_to_be64(di->di_goal_data);
-	str->di_generation = cpu_to_be64(di->di_generation);
-
-	str->di_flags = cpu_to_be32(di->di_flags);
-	str->di_height = cpu_to_be16(di->di_height);
-	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
-					     GFS2_FORMAT_DE : 0);
-	str->di_depth = cpu_to_be16(di->di_depth);
-	str->di_entries = cpu_to_be32(di->di_entries);
-
-	str->di_eattr = cpu_to_be64(di->di_eattr);
-}
-
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-	const struct gfs2_dinode_host *di = &ip->i_di;
-
-	gfs2_inum_print(&ip->i_num);
-
-	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
-	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
-	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
-
-	pv(di, di_flags, "0x%.8X");
-	pv(di, di_height, "%u");
-
-	pv(di, di_depth, "%u");
-	pv(di, di_entries, "%u");
-
-	printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
-}
-
-void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-	const struct gfs2_log_header *str = buf;
-
-	gfs2_meta_header_in(&lh->lh_header, buf);
-	lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-	lh->lh_flags = be32_to_cpu(str->lh_flags);
-	lh->lh_tail = be32_to_cpu(str->lh_tail);
-	lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-	lh->lh_hash = be32_to_cpu(str->lh_hash);
-}
-
-void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
-	const struct gfs2_inum_range *str = buf;
-
-	ir->ir_start = be64_to_cpu(str->ir_start);
-	ir->ir_length = be64_to_cpu(str->ir_length);
-}
-
-void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
-	struct gfs2_inum_range *str = buf;
-
-	str->ir_start = cpu_to_be64(ir->ir_start);
-	str->ir_length = cpu_to_be64(ir->ir_length);
-}
-
-void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
-{
-	const struct gfs2_statfs_change *str = buf;
-
-	sc->sc_total = be64_to_cpu(str->sc_total);
-	sc->sc_free = be64_to_cpu(str->sc_free);
-	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
-}
-
-void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
-{
-	struct gfs2_statfs_change *str = buf;
-
-	str->sc_total = cpu_to_be64(sc->sc_total);
-	str->sc_free = cpu_to_be64(sc->sc_free);
-	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
-}
-
-void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
-{
-	const struct gfs2_quota_change *str = buf;
-
-	qc->qc_change = be64_to_cpu(str->qc_change);
-	qc->qc_flags = be32_to_cpu(str->qc_flags);
-	qc->qc_id = be32_to_cpu(str->qc_id);
-}
-
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 30c15622174..26c888890c2 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
 #include "trans.h"
 #include "rgrp.h"
 #include "ops_file.h"
+#include "super.h"
 #include "util.h"
 #include "glops.h"
 
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
 		end = start + bsize;
 		if (end <= from || start >= to)
 			continue;
+		if (gfs2_is_jdata(ip))
+			set_buffer_uptodate(bh);
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 	}
 }
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 		return 0; /* don't care */
 	}
 
-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+	if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+	    PageChecked(page)) {
+		ClearPageChecked(page);
 		error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
 		if (error)
 			goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	 * so we need to supply one here. It doesn't happen often.
 	 */
 	if (unlikely(page->index)) {
-		kaddr = kmap_atomic(page, KM_USER0);
-		memset(kaddr, 0, PAGE_CACHE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(page);
-		SetPageUptodate(page);
+		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
 		return 0;
 	}
 
@@ -450,6 +451,31 @@ out_uninit:
 }
 
 /**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+	u64 fs_total, new_free;
+
+	/* Total up the file system space, according to the latest rindex. */
+	fs_total = gfs2_ri_total(sdp);
+
+	spin_lock(&sdp->sd_statfs_spin);
+	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+		new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+	else
+		new_free = 0;
+	spin_unlock(&sdp->sd_statfs_spin);
+	fs_warn(sdp, "File system extended by %llu blocks.\n",
+		(unsigned long long)new_free);
+	gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+
+/**
  * gfs2_commit_write - Commit write to a file
  * @file: The file to write to
  * @page: The page containing the data
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
+	if (inode == sdp->sd_rindex)
+		adjust_fs_space(inode);
+
 	brelse(dibh);
 	gfs2_trans_end(sdp);
 	if (al->al_requested) {
@@ -543,6 +572,23 @@ fail_nounlock:
 }
 
 /**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+ 
+static int gfs2_set_page_dirty(struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+
+	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+		SetPageChecked(page);
+	return __set_page_dirty_buffers(page);
+}
+
+/**
  * gfs2_bmap - Block map function
  * @mapping: Address space info
  * @lblock: The block to map
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (bd) {
 		bd->bd_bh = NULL;
 		bh->b_private = NULL;
+		if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+			kmem_cache_free(gfs2_bufdata_cachep, bd);
 	}
 	gfs2_log_unlock(sdp);
 
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
 	unsigned int curr_off = 0;
 
 	BUG_ON(!PageLocked(page));
+	if (offset == 0)
+		ClearPageChecked(page);
 	if (!page_has_buffers(page))
 		return;
 
@@ -728,8 +778,8 @@ static unsigned limit = 0;
 			return;
 
 		fs_warn(sdp, "ip = %llu %llu\n",
-			(unsigned long long)ip->i_num.no_formal_ino,
-			(unsigned long long)ip->i_num.no_addr);
+			(unsigned long long)ip->i_no_formal_ino,
+			(unsigned long long)ip->i_no_addr);
 
 		for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
 			fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
 	.sync_page = block_sync_page,
 	.prepare_write = gfs2_prepare_write,
 	.commit_write = gfs2_commit_write,
+	.set_page_dirty = gfs2_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index 35aaee4aa7e..fa1b5b3d28b 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index a6fdc52f554..793e334d098 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
 #include "glock.h"
 #include "ops_dentry.h"
 #include "util.h"
+#include "inode.h"
 
 /**
  * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct gfs2_inode *dip = GFS2_I(parent->d_inode);
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_holder d_gh;
-	struct gfs2_inode *ip;
-	struct gfs2_inum_host inum;
-	unsigned int type;
+	struct gfs2_inode *ip = NULL;
 	int error;
 	int had_lock=0;
 
-	if (inode && is_bad_inode(inode))
-		goto invalid;
+	if (inode) {
+		if (is_bad_inode(inode))
+			goto invalid;
+		ip = GFS2_I(inode);
+	}
 
 	if (sdp->sd_args.ar_localcaching)
 		goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 			goto fail;
 	} 
 
-	error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+	error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
 	switch (error) {
 	case 0:
 		if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 		goto fail_gunlock;
 	}
 
-	ip = GFS2_I(inode);
-
-	if (!gfs2_inum_equal(&ip->i_num, &inum))
-		goto invalid_gunlock;
-
-	if (IF2DT(ip->i_inode.i_mode) != type) {
-		gfs2_consist_inode(dip);
-		goto fail_gunlock;
-	}
-
 valid_gunlock:
 	if (!had_lock)
 		gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index aad918337a4..99ea5659bc2 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,10 +22,14 @@
 #include "glops.h"
 #include "inode.h"
 #include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
 #include "rgrp.h"
 #include "util.h"
 
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
+
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     __u32 *p,
 				     int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     void *context)
 {
 	__be32 *fh = (__force __be32 *)p;
-	struct gfs2_fh_obj fh_obj;
-	struct gfs2_inum_host *this, parent;
+	struct gfs2_inum_host inum, parent;
 
-	this 		= &fh_obj.this;
-	fh_obj.imode 	= DT_UNKNOWN;
 	memset(&parent, 0, sizeof(struct gfs2_inum));
 
 	switch (fh_len) {
 	case GFS2_LARGE_FH_SIZE:
+	case GFS2_OLD_FH_SIZE:
 		parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
 		parent.no_formal_ino |= be32_to_cpu(fh[5]);
 		parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
 		parent.no_addr |= be32_to_cpu(fh[7]);
-		fh_obj.imode = be32_to_cpu(fh[8]);
 	case GFS2_SMALL_FH_SIZE:
-		this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-		this->no_formal_ino |= be32_to_cpu(fh[1]);
-		this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-		this->no_addr |= be32_to_cpu(fh[3]);
+		inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+		inum.no_formal_ino |= be32_to_cpu(fh[1]);
+		inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+		inum.no_addr |= be32_to_cpu(fh[3]);
 		break;
 	default:
 		return NULL;
 	}
 
-	return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+	return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
 						    acceptable, context);
 }
 
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	    (connectable && *len < GFS2_LARGE_FH_SIZE))
 		return 255;
 
-	fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-	fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-	fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
-	fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+	fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_SMALL_FH_SIZE;
 
 	if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	igrab(inode);
 	spin_unlock(&dentry->d_lock);
 
-	fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-	fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-	fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
-	fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
-
-	fh[8]  = cpu_to_be32(inode->i_mode);
-	fh[9]  = 0;	/* pad to double word */
+	fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_LARGE_FH_SIZE;
 
 	iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	ip = GFS2_I(inode);
 
 	*name = 0;
-	gnfd.inum = ip->i_num;
+	gnfd.inum.no_addr = ip->i_no_addr;
+	gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
 	gnfd.name = name;
 
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
-	struct gfs2_inum_host *inum = &fh_obj->this;
+	struct gfs2_inum_host *inum = inum_obj;
 	struct gfs2_holder i_gh, ri_gh, rgd_gh;
 	struct gfs2_rgrpd *rgd;
 	struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 
 	/* System files? */
 
-	inode = gfs2_ilookup(sb, inum);
+	inode = gfs2_ilookup(sb, inum->no_addr);
 	if (inode) {
-		if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
 			iput(inode);
 			return ERR_PTR(-ESTALE);
 		}
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 	gfs2_glock_dq_uninit(&rgd_gh);
 	gfs2_glock_dq_uninit(&ri_gh);
 
-	inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+					inum->no_addr,
+					0);
 	if (!inode)
 		goto fail;
 	if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 		goto fail;
 	}
 
+	/* Pick up the works we bypass in gfs2_inode_lookup */
+	if (inode->i_state & I_NEW) 
+		gfs2_set_iop(inode);
+
+	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+		iput(inode);
+		goto fail;
+	}
+
 	error = -EIO;
 	if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
 		iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644
index f925a955b3b..00000000000
--- a/fs/gfs2/ops_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_EXPORT_DOT_H__
-#define __OPS_EXPORT_DOT_H__
-
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 10
-
-extern struct export_operations gfs2_export_ops;
-struct gfs2_fh_obj {
-	struct gfs2_inum_host this;
-	__u32            imode;
-};
-
-#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df880458..196d83266e3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
 	struct lm_lockname name =
-		{ .ln_number = ip->i_num.no_addr,
+		{ .ln_number = ip->i_no_addr,
 		  .ln_type = LM_TYPE_PLOCK };
 
 	if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 		gfs2_glock_dq_uninit(fl_gh);
 	} else {
 		error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-				      ip->i_num.no_addr, &gfs2_flock_glops,
+				      ip->i_no_addr, &gfs2_flock_glops,
 				      CREATE, &gl);
 		if (error)
 			goto out;
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
 	.release	= gfs2_close,
 	.fsync		= gfs2_fsync,
 	.lock		= gfs2_lock,
-	.sendfile	= generic_file_sendfile,
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2c5f8e7def0..cf5aa505054 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
 #include "inode.h"
 #include "lm.h"
 #include "mount.h"
-#include "ops_export.h"
 #include "ops_fstype.h"
 #include "ops_super.h"
 #include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 	sb->s_magic = GFS2_MAGIC;
 	sb->s_op = &gfs2_super_ops;
 	sb->s_export_op = &gfs2_export_ops;
+	sb->s_time_gran = 1;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 
 static int init_names(struct gfs2_sbd *sdp, int silent)
 {
-	struct page *page;
 	char *proto, *table;
 	int error = 0;
 
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	/*  Try to autodetect  */
 
 	if (!proto[0] || !table[0]) {
-		struct gfs2_sb *sb;
-		page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-		if (!page)
-			return -ENOBUFS;
-		sb = kmap(page);
-		gfs2_sb_in(&sdp->sd_sb, sb);
-		kunmap(page);
-		__free_page(page);
+		error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+		if (error)
+			return error;
 
 		error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
 		if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
 	snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
 
+	while ((table = strchr(sdp->sd_table_name, '/')))
+		*table = '_';
+
 out:
 	return error;
 }
@@ -236,17 +233,17 @@ fail:
 	return error;
 }
 
-static struct inode *gfs2_lookup_root(struct super_block *sb,
-				      struct gfs2_inum_host *inum)
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
+					     u64 no_addr)
 {
-	return gfs2_inode_lookup(sb, inum, DT_DIR);
+	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
 }
 
 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder sb_gh;
-	struct gfs2_inum_host *inum;
+	u64 no_addr;
 	struct inode *inode;
 	int error = 0;
 
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 	sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
 
 	/* Get the root inode */
-	inum = &sdp->sd_sb.sb_root_dir;
+	no_addr = sdp->sd_sb.sb_root_dir.no_addr;
 	if (sb->s_type == &gfs2meta_fs_type)
-		inum = &sdp->sd_sb.sb_master_dir;
-	inode = gfs2_lookup_root(sb, inum);
+		no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+	inode = gfs2_lookup_root(sb, no_addr);
 	if (IS_ERR(inode)) {
 		error = PTR_ERR(inode);
 		fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 	if (undo)
 		goto fail_qinode;
 
-	inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+	inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
 	if (IS_ERR(inode)) {
 		error = PTR_ERR(inode);
 		fs_err(sdp, "can't read in master directory: %d\n", error);
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
index 7cc2c296271..407029b3b2b 100644
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
 
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;
 
 #endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d85f6e05cb9..911c115b5c6 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (error)
 		goto out_gunlock;
 
-	error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+	error = gfs2_dir_check(dir, &dentry->d_name, NULL);
 	switch (error) {
 	case -ENOENT:
 		break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 2 * RES_DINODE + RES_STATFS +
 					 RES_QUOTA, 0);
 		if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_ipres;
 	}
 
-	error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
-			     IF2DT(inode->i_mode));
+	error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
 	if (error)
 		goto out_end_trans;
 
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
 
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
 		gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
 
-		gfs2_inum_out(&dip->i_num, &dent->de_inum);
+		gfs2_inum_out(dip, dent);
 		dent->de_type = cpu_to_be16(DT_DIR);
 
 		gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
 	error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		 * this is the case of the target file already existing
 		 * so we unlink before doing the rename
 		 */
-		nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
 		if (nrgd)
 			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
 	}
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_gunlock;
 
-		error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+		error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
 		switch (error) {
 		case -ENOENT:
 			error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 4 * RES_DINODE + 4 * RES_LEAF +
 					 RES_STATFS + RES_QUOTA + 4, 0);
 		if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_end_trans;
 
-		error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+		error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
 		if (error)
 			goto out_end_trans;
 	} else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		error = gfs2_meta_inode_buffer(ip, &dibh);
 		if (error)
 			goto out_end_trans;
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		goto out_end_trans;
 
-	error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_inode.i_mode));
+	error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto out_end_trans;
 
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 	}
 
 	error = gfs2_truncatei(ip, attr->ia_size);
-	if (error)
-		return error;
+	if (error && (inode->i_size != ip->i_di.di_size))
+		i_size_write(inode, ip->i_di.di_size);
 
 	return error;
 }
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 485ce3d4992..603d940f115 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
 		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
 		ip->i_gl = NULL;
-		if (ip->i_iopen_gh.gh_gl)
+		if (ip->i_iopen_gh.gh_gl) {
+			ip->i_iopen_gh.gh_gl->gl_object = NULL;
 			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		}
 	}
 }
 
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (!inode->i_private)
 		goto out;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
 	}
 
-	gfs2_glock_dq(&ip->i_iopen_gh);
+	gfs2_glock_dq_wait(&ip->i_iopen_gh);
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
 	error = gfs2_glock_nq(&ip->i_iopen_gh);
 	if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index aa0dbd2aac1..404b7cc9f8c 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out_gunlock_q;
 
-	error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+	error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
 				 ind_blocks + RES_DINODE +
 				 RES_STATFS + RES_QUOTA, 0);
 	if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c186857e48a..6e546ee8f3d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0
 
+struct gfs2_quota_host {
+	u64 qu_limit;
+	u64 qu_warn;
+	s64 qu_value;
+};
+
+struct gfs2_quota_change_host {
+	u64 qc_change;
+	u32 qc_flags; /* GFS2_QCF_... */
+	u32 qc_id;
+};
+
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
 	u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
 	mutex_unlock(&sdp->sd_quota_mutex);
 }
 
+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+	const struct gfs2_quota *str = buf;
+
+	qu->qu_limit = be64_to_cpu(str->qu_limit);
+	qu->qu_warn = be64_to_cpu(str->qu_warn);
+	qu->qu_value = be64_to_cpu(str->qu_value);
+}
+
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+	struct gfs2_quota *str = buf;
+
+	str->qu_limit = cpu_to_be64(qu->qu_limit);
+	str->qu_warn = cpu_to_be64(qu->qu_warn);
+	str->qu_value = cpu_to_be64(qu->qu_value);
+	memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
+
 /**
  * gfs2_adjust_quota
  *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	struct inode *inode = &ip->i_inode;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
-	unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
 	unsigned blocksize, iblock, pos;
 	struct buffer_head *bh;
 	struct page *page;
 	void *kaddr;
-	__be64 *ptr;
+	char *ptr;
+	struct gfs2_quota_host qp;
 	s64 value;
 	int err = -EIO;
 
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 
 	kaddr = kmap_atomic(page, KM_USER0);
 	ptr = kaddr + offset;
-	value = (s64)be64_to_cpu(*ptr) + change;
-	*ptr = cpu_to_be64(value);
+	gfs2_quota_in(&qp, ptr);
+	qp.qu_value += change;
+	value = qp.qu_value;
+	gfs2_quota_out(&qp, ptr);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
 	err = 0;
 	qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
 	qd->qd_qb.qb_value = cpu_to_be64(value);
+	((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+	((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
 unlock:
 	unlock_page(page);
 	page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 			goto out_alloc;
 
 		error = gfs2_trans_begin(sdp,
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 num_qd * data_blocks +
 					 nalloc * ind_blocks +
 					 RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 		offset = qd2offset(qd);
 		error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
 					  (struct gfs2_quota_data *)
-					  qd->qd_gl->gl_lvb);
+					  qd);
 		if (error)
 			goto out_end_trans;
 
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
 	return error;
 }
 
+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+	const struct gfs2_quota_change *str = buf;
+
+	qc->qc_change = be64_to_cpu(str->qc_change);
+	qc->qc_flags = be32_to_cpu(str->qc_flags);
+	qc->qc_id = be32_to_cpu(str->qc_id);
+}
+
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 8bc182c7e2e..5ada38c99a2 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
 	}
 }
 
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+	const struct gfs2_log_header *str = buf;
+
+	if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+	    str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+		return 1;
+
+	lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+	lh->lh_flags = be32_to_cpu(str->lh_flags);
+	lh->lh_tail = be32_to_cpu(str->lh_tail);
+	lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+	lh->lh_hash = be32_to_cpu(str->lh_hash);
+	return 0;
+}
+
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 					     sizeof(u32));
 	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
 	hash ^= (u32)~0;
-	gfs2_log_header_in(&lh, bh->b_data);
+	error = gfs2_log_header_in(&lh, bh->b_data);
 	brelse(bh);
 
-	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-	    lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-	    lh.lh_blkno != blk || lh.lh_hash != hash)
+	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
 		return 1;
 
 	*head = lh;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1727f5012ef..e4e04062515 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"
 
 #define BFITNOENT ((u32)~0)
 
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
 	        1, 0, 0, 0
 };
 
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
+
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_bitmap *bi = NULL;
-	u32 length = rgd->rd_ri.ri_length;
+	u32 length = rgd->rd_length;
 	u32 count[4], tmp;
 	int buf, x;
 
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 		return;
 	}
 
-	tmp = rgd->rd_ri.ri_data -
+	tmp = rgd->rd_data -
 		rgd->rd_rg.rg_free -
 		rgd->rd_rg.rg_dinodes;
 	if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 
 }
 
-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 {
-	u64 first = ri->ri_data0;
-	u64 last = first + ri->ri_data;
+	u64 first = rgd->rd_data0;
+	u64 last = first + rgd->rd_data;
 	return first <= block && block < last;
 }
 
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
 	spin_lock(&sdp->sd_rindex_spin);
 
 	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
-		if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+		if (rgrp_contains_block(rgd, blk)) {
 			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
 			spin_unlock(&sdp->sd_rindex_spin);
 			return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 	mutex_unlock(&sdp->sd_rindex_mutex);
 }
 
+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
+	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
+	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
+
 /**
  * gfs2_compute_bitstructs - Compute the bitmap sizes
  * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_bitmap *bi;
-	u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 	u32 bytes_left, bytes;
 	int x;
 
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 	if (!rgd->rd_bits)
 		return -ENOMEM;
 
-	bytes_left = rgd->rd_ri.ri_bitbytes;
+	bytes_left = rgd->rd_bitbytes;
 
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 		} else if (x + 1 == length) {
 			bytes = bytes_left;
 			bi->bi_offset = sizeof(struct gfs2_meta_header);
-			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 			bi->bi_len = bytes;
 		/* other blocks */
 		} else {
 			bytes = sdp->sd_sb.sb_bsize -
 				sizeof(struct gfs2_meta_header);
 			bi->bi_offset = sizeof(struct gfs2_meta_header);
-			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 			bi->bi_len = bytes;
 		}
 
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 		return -EIO;
 	}
 	bi = rgd->rd_bits + (length - 1);
-	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 		if (gfs2_consist_rgrpd(rgd)) {
-			gfs2_rindex_print(&rgd->rd_ri);
+			gfs2_rindex_print(rgd);
 			fs_err(sdp, "start=%u len=%u offset=%u\n",
 			       bi->bi_start, bi->bi_len, bi->bi_offset);
 		}
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 }
 
 /**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+	u64 total_data = 0;	
+	struct inode *inode = sdp->sd_rindex;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	char buf[sizeof(struct gfs2_rindex)];
+	struct file_ra_state ra_state;
+	int error, rgrps;
+
+	mutex_lock(&sdp->sd_rindex_mutex);
+	file_ra_state_init(&ra_state, inode->i_mapping);
+	for (rgrps = 0;; rgrps++) {
+		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+
+		if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+			break;
+		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+					   sizeof(struct gfs2_rindex));
+		if (error != sizeof(struct gfs2_rindex))
+			break;
+		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+	}
+	mutex_unlock(&sdp->sd_rindex_mutex);
+	return total_data;
+}
+
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+	const struct gfs2_rindex *str = buf;
+
+	rgd->rd_addr = be64_to_cpu(str->ri_addr);
+	rgd->rd_length = be32_to_cpu(str->ri_length);
+	rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+	rgd->rd_data = be32_to_cpu(str->ri_data);
+	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
  * @gl: The glock covering the rindex inode
  *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int read_rindex_entry(struct gfs2_inode *ip,
+			     struct file_ra_state *ra_state)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+	char buf[sizeof(struct gfs2_rindex)];
+	int error;
+	struct gfs2_rgrpd *rgd;
+
+	error = gfs2_internal_read(ip, ra_state, buf, &pos,
+				   sizeof(struct gfs2_rindex));
+	if (!error)
+		return 0;
+	if (error != sizeof(struct gfs2_rindex)) {
+		if (error > 0)
+			error = -EIO;
+		return error;
+	}
+
+	rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+	error = -ENOMEM;
+	if (!rgd)
+		return error;
+
+	mutex_init(&rgd->rd_mutex);
+	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+	rgd->rd_sbd = sdp;
+
+	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+
+	gfs2_rindex_in(rgd, buf);
+	error = compute_bitstructs(rgd);
+	if (error)
+		return error;
+
+	error = gfs2_glock_get(sdp, rgd->rd_addr,
+			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+	if (error)
+		return error;
+
+	rgd->rd_gl->gl_object = rgd;
+	rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+	rgd->rd_flags |= GFS2_RDF_CHECK;
+	return error;
+}
+
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
  * Returns: 0 on successful update, error code otherwise
  */
 
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_rgrpd *rgd;
-	char buf[sizeof(struct gfs2_rindex)];
 	struct file_ra_state ra_state;
-	u64 junk = ip->i_di.di_size;
+	u64 rgrp_count = ip->i_di.di_size;
 	int error;
 
-	if (do_div(junk, sizeof(struct gfs2_rindex))) {
+	if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
 		gfs2_consist_inode(ip);
 		return -EIO;
 	}
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	clear_rgrpdi(sdp);
 
 	file_ra_state_init(&ra_state, inode->i_mapping);
-	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-		loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
-		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
-					    sizeof(struct gfs2_rindex));
-		if (!error)
-			break;
-		if (error != sizeof(struct gfs2_rindex)) {
-			if (error > 0)
-				error = -EIO;
-			goto fail;
+	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
+		error = read_rindex_entry(ip, &ra_state);
+		if (error) {
+			clear_rgrpdi(sdp);
+			return error;
 		}
+	}
 
-		rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
-		error = -ENOMEM;
-		if (!rgd)
-			goto fail;
-
-		mutex_init(&rgd->rd_mutex);
-		lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
-		rgd->rd_sbd = sdp;
-
-		list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
-		list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-
-		gfs2_rindex_in(&rgd->rd_ri, buf);
-		error = compute_bitstructs(rgd);
-		if (error)
-			goto fail;
+	sdp->sd_rindex_vn = ip->i_gl->gl_vn;
+	return 0;
+}
 
-		error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
-				       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
-		if (error)
-			goto fail;
+/**
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
+ *
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct inode *inode = &ip->i_inode;
+	struct file_ra_state ra_state;
+	int error;
 
-		rgd->rd_gl->gl_object = rgd;
-		rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+	file_ra_state_init(&ra_state, inode->i_mapping);
+	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+		/* Ignore partials */
+		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+		    ip->i_di.di_size)
+			break;
+		error = read_rindex_entry(ip, &ra_state);
+		if (error) {
+			clear_rgrpdi(sdp);
+			return error;
+		}
 	}
 
 	sdp->sd_rindex_vn = ip->i_gl->gl_vn;
 	return 0;
-
-fail:
-	clear_rgrpdi(sdp);
-	return error;
 }
 
 /**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
 	return error;
 }
 
+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+	const struct gfs2_rgrp *str = buf;
+
+	rg->rg_flags = be32_to_cpu(str->rg_flags);
+	rg->rg_free = be32_to_cpu(str->rg_free);
+	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+	struct gfs2_rgrp *str = buf;
+
+	str->rg_flags = cpu_to_be32(rg->rg_flags);
+	str->rg_free = cpu_to_be32(rg->rg_free);
+	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+	str->__pad = cpu_to_be32(0);
+	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
+
 /**
  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
  * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_glock *gl = rgd->rd_gl;
-	unsigned int length = rgd->rd_ri.ri_length;
+	unsigned int length = rgd->rd_length;
 	struct gfs2_bitmap *bi;
 	unsigned int x, y;
 	int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
-		error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
 		if (error)
 			goto fail;
 	}
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
-	int x, length = rgd->rd_ri.ri_length;
+	int x, length = rgd->rd_length;
 
 	spin_lock(&sdp->sd_rindex_spin);
 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
-	unsigned int length = rgd->rd_ri.ri_length;
+	unsigned int length = rgd->rd_length;
 	unsigned int x;
 
 	for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 }
 
 /**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+	struct inode *inode;
+	u32 goal = 0;
+	u64 no_addr;
+
+	for(;;) {
+		goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+				    GFS2_BLKST_UNLINKED);
+		if (goal == 0)
+			return 0;
+		no_addr = goal + rgd->rd_data0;
+		if (no_addr <= *last_unlinked)
+			continue;
+		*last_unlinked = no_addr;
+		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+					no_addr, -1);
+		if (!IS_ERR(inode))
+			return inode;
+	}
+
+	rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	return NULL;
+}
+
+/**
  * recent_rgrp_first - get first RG from "recent" list
  * @sdp: The GFS2 superblock
  * @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
 		goto first;
 
 	list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-		if (rgd->rd_ri.ri_addr == rglast)
+		if (rgd->rd_addr == rglast)
 			goto out;
 	}
 
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+	struct inode *inode = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
 	struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			rgd = recent_rgrp_next(rgd, 1);
 			break;
 
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 			break;
 
 		default:
-			return error;
+			return ERR_PTR(error);
 		}
 	}
 
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			break;
 
 		case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 			break;
 
 		default:
-			return error;
+			return ERR_PTR(error);
 		}
 
 		rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 
 		if (rgd == begin) {
 			if (++loops >= 3)
-				return -ENOSPC;
+				return ERR_PTR(-ENOSPC);
 			if (!skipped)
 				loops++;
 			flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 	}
 
 out:
-	ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+	ip->i_last_rg_alloc = rgd->rd_addr;
 
 	if (begin) {
 		recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
 		forward_rgrp_set(sdp, rgd);
 	}
 
-	return 0;
+	return NULL;
 }
 
 /**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = &ip->i_alloc;
-	int error;
+	struct inode *inode;
+	int error = 0;
+	u64 last_unlinked = 0;
 
 	if (gfs2_assert_warn(sdp, al->al_requested))
 		return -EINVAL;
 
-	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+	/* We need to hold the rindex unless the inode we're using is
+	   the rindex itself, in which case it's already held. */
+	if (ip != GFS2_I(sdp->sd_rindex))
+		error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+		error = gfs2_ri_update_special(ip);
+
 	if (error)
 		return error;
 
-	error = get_local_rgrp(ip);
-	if (error) {
-		gfs2_glock_dq_uninit(&al->al_ri_gh);
-		return error;
+	inode = get_local_rgrp(ip, &last_unlinked);
+	if (inode) {
+		if (ip != GFS2_I(sdp->sd_rindex))
+			gfs2_glock_dq_uninit(&al->al_ri_gh);
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+		iput(inode);
+		gfs2_log_flush(sdp, NULL);
+		goto try_again;
 	}
 
 	al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
 	al->al_rgd = NULL;
 	gfs2_glock_dq_uninit(&al->al_rgd_gh);
-	gfs2_glock_dq_uninit(&al->al_ri_gh);
+	if (ip != GFS2_I(sdp->sd_rindex))
+		gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
 
 /**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 	unsigned int buf;
 	unsigned char type;
 
-	length = rgd->rd_ri.ri_length;
-	rgrp_block = block - rgd->rd_ri.ri_data0;
+	length = rgd->rd_length;
+	rgrp_block = block - rgd->rd_data0;
 
 	for (buf = 0; buf < length; buf++) {
 		bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
  */
 
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-			     unsigned char old_state, unsigned char new_state)
+			unsigned char old_state, unsigned char new_state)
 {
 	struct gfs2_bitmap *bi = NULL;
-	u32 length = rgd->rd_ri.ri_length;
+	u32 length = rgd->rd_length;
 	u32 blk = 0;
 	unsigned int buf, x;
 
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 		goal = 0;
 	}
 
-	if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
-		blk = 0;
+	if (old_state != new_state) {
+		gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
 
-	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-	gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-		    bi->bi_len, blk, new_state);
-	if (bi->bi_clone)
-		gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+		gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
 			    bi->bi_len, blk, new_state);
+		if (bi->bi_clone)
+			gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+				    bi->bi_len, blk, new_state);
+	}
 
-	return bi->bi_start * GFS2_NBBY + blk;
+	return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 		return NULL;
 	}
 
-	length = rgd->rd_ri.ri_length;
+	length = rgd->rd_length;
 
-	rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+	rgrp_blk = bstart - rgd->rd_data0;
 
 	while (blen--) {
 		for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 	u32 goal, blk;
 	u64 block;
 
-	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
-		goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+	if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
+		goal = ip->i_di.di_goal_data - rgd->rd_data0;
 	else
 		goal = rgd->rd_last_alloc_data;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
 	rgd->rd_last_alloc_data = blk;
 
-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;
 	ip->i_di.di_goal_data = block;
 
 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 	u32 goal, blk;
 	u64 block;
 
-	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
-		goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+	if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
+		goal = ip->i_di.di_goal_meta - rgd->rd_data0;
 	else
 		goal = rgd->rd_last_alloc_meta;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
 	rgd->rd_last_alloc_meta = blk;
 
-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;
 	ip->i_di.di_goal_meta = block;
 
 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 
 	rgd->rd_last_alloc_meta = blk;
 
-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;
 
 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
 	rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_rgrpd *rgd;
-	u64 blkno = ip->i_num.no_addr;
+	u64 blkno = ip->i_no_addr;
 
 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-	gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+	gfs2_free_uninit_di(rgd, ip->i_no_addr);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
-	gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
 
 /**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b01e0cfc99b..b4c6adfc6f2 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
 		      int flags);
 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4fdda974dc8..f916b9740c7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
 	unsigned int x;
 
-	if (sb->sb_header.mh_magic != GFS2_MAGIC ||
-	    sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+	if (sb->sb_magic != GFS2_MAGIC ||
+	    sb->sb_type != GFS2_METATYPE_SB) {
 		if (!silent)
 			printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
 		return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 	return 0;
 }
 
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+	const struct gfs2_sb *str = buf;
+
+	sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+	sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+	sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+	sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+	sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+	sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+	sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
 /**
  * gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
  * @sector: The location of the super block
+ * @error: The error code to return
  *
  * This uses the bio functions to read the super block from disk
  * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
  * the master directory (contains pointers to journals etc) and the
  * root directory.
  *
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
  */
 
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 {
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_sb *p;
 	struct page *page;
 	struct bio *bio;
 
 	page = alloc_page(GFP_KERNEL);
 	if (unlikely(!page))
-		return NULL;
+		return -ENOBUFS;
 
 	ClearPageUptodate(page);
 	ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 	bio = bio_alloc(GFP_KERNEL, 1);
 	if (unlikely(!bio)) {
 		__free_page(page);
-		return NULL;
+		return -ENOBUFS;
 	}
 
 	bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 	bio_put(bio);
 	if (!PageUptodate(page)) {
 		__free_page(page);
-		return NULL;
+		return -EIO;
 	}
-	return page;
+	p = kmap(page);
+	gfs2_sb_in(&sdp->sd_sb, p);
+	kunmap(page);
+	__free_page(page);
+	return 0;
 }
 
 /**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
 	u32 tmp_blocks;
 	unsigned int x;
 	int error;
-	struct page *page;
-	char *sb;
 
-	page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-	if (!page) {
+	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+	if (error) {
 		if (!silent)
 			fs_err(sdp, "can't read superblock\n");
-		return -EIO;
+		return error;
 	}
-	sb = kmap(page);
-	gfs2_sb_in(&sdp->sd_sb, sb);
-	kunmap(page);
-	__free_page(page);
 
 	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
 	if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 		name.len = sprintf(buf, "journal%u", sdp->sd_journals);
 		name.hash = gfs2_disk_hash(name.name, name.len);
 
-		error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+		error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
 		if (error == -ENOENT) {
 			error = 0;
 			break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	return error;
 }
 
+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+	const struct gfs2_statfs_change *str = buf;
+
+	sc->sc_total = be64_to_cpu(str->sc_total);
+	sc->sc_free = be64_to_cpu(str->sc_free);
+	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+	struct gfs2_statfs_change *str = buf;
+
+	str->sc_total = cpu_to_be64(sc->sc_total);
+	str->sc_free = cpu_to_be64(sc->sc_free);
+	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
+
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 			    struct gfs2_statfs_change_host *sc)
 {
 	gfs2_rgrp_verify(rgd);
-	sc->sc_total += rgd->rd_ri.ri_data;
+	sc->sc_total += rgd->rd_data;
 	sc->sc_free += rgd->rd_rg.rg_free;
 	sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
 	return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index e590b2df11d..60a870e430b 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
 
 int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
 
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 601eaa1b9ed..424a0774eda 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
 		"GFS2: fsid=%s:   inode = %llu %llu\n"
 		"GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 		sdp->sd_fsname,
-		sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
-		(unsigned long long)ip->i_num.no_addr,
+		sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
+		(unsigned long long)ip->i_no_addr,
 		sdp->sd_fsname, function, file, line);
 	return rv;
 }
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
 		"GFS2: fsid=%s:   RG = %llu\n"
 		"GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 		sdp->sd_fsname,
-		sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+		sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
 		sdp->sd_fsname, function, file, line);
 	return rv;
 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8..bc835f272a6 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 	.fsync		= file_fsync,
 	.open		= hfs_file_open,
 	.release	= hfs_file_release,
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 90ebab753d3..050d29c0a5b 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -62,8 +62,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 		if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) &&
 		    (head->key_type == HFSPLUS_KEY_BINARY))
 			tree->keycmp = hfsplus_cat_bin_cmp_key;
-		else
+		else {
 			tree->keycmp = hfsplus_cat_case_cmp_key;
+			HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD;
+		}
 	} else {
 		printk(KERN_ERR "hfs: unknown B*Tree requested\n");
 		goto fail_page;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 80b5682a227..1955ee61251 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -36,6 +36,8 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 	u16 type;
 
 	sb = dir->i_sb;
+
+	dentry->d_op = &hfsplus_dentry_operations;
 	dentry->d_fsdata = NULL;
 	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 3915635b447..d9f5eda6d03 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -150,6 +150,7 @@ struct hfsplus_sb_info {
 #define HFSPLUS_SB_NODECOMPOSE	0x0002
 #define HFSPLUS_SB_FORCE	0x0004
 #define HFSPLUS_SB_HFSX		0x0008
+#define HFSPLUS_SB_CASEFOLD	0x0010
 
 
 struct hfsplus_inode_info {
@@ -321,6 +322,7 @@ void hfsplus_file_truncate(struct inode *);
 /* inode.c */
 extern const struct address_space_operations hfsplus_aops;
 extern const struct address_space_operations hfsplus_btree_aops;
+extern struct dentry_operations hfsplus_dentry_operations;
 
 void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
 void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
@@ -353,6 +355,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist
 int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
 int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
 int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
+int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
+int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2);
 
 /* wrapper.c */
 int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc1..6f7c662174d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -131,6 +131,11 @@ const struct address_space_operations hfsplus_aops = {
 	.writepages	= hfsplus_writepages,
 };
 
+struct dentry_operations hfsplus_dentry_operations = {
+	.d_hash       = hfsplus_hash_dentry,
+	.d_compare    = hfsplus_compare_dentry,
+};
+
 static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry,
 					  struct nameidata *nd)
 {
@@ -288,7 +293,7 @@ static const struct file_operations hfsplus_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 	.fsync		= file_fsync,
 	.open		= hfsplus_file_open,
 	.release	= hfsplus_file_release,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ebd1b380cbb..6d87a2a9534 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -283,11 +283,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	struct nls_table *nls = NULL;
 	int err = -EINVAL;
 
-	sbi = kmalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
 
-	memset(sbi, 0, sizeof(HFSPLUS_SB(sb)));
 	sb->s_fs_info = sbi;
 	INIT_HLIST_HEAD(&sbi->rsrc_inodes);
 	hfsplus_fill_defaults(sbi);
@@ -381,6 +380,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		iput(root);
 		goto cleanup;
 	}
+	sb->s_root->d_op = &hfsplus_dentry_operations;
 
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 689c8bd721f..9e10f9444b6 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -239,61 +239,201 @@ out:
 	return res;
 }
 
-int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len)
+/*
+ * Convert one or more ASCII characters into a single unicode character.
+ * Returns the number of ASCII characters corresponding to the unicode char.
+ */
+static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
+			      wchar_t *uc)
 {
-	struct nls_table *nls = HFSPLUS_SB(sb).nls;
-	int size, off, decompose;
+	int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
+	if (size <= 0) {
+		*uc = '?';
+		size = 1;
+	}
+	switch (*uc) {
+	case 0x2400:
+		*uc = 0;
+		break;
+	case ':':
+		*uc = '/';
+		break;
+	}
+	return size;
+}
+
+/* Decomposes a single unicode character. */
+static inline u16 *decompose_unichar(wchar_t uc, int *size)
+{
+	int off;
+
+	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
+	if (off == 0 || off == 0xffff)
+		return NULL;
+
+	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
+	if (!off)
+		return NULL;
+
+	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
+	if (!off)
+		return NULL;
+
+	off = hfsplus_decompose_table[off + (uc & 0xf)];
+	*size = off & 3;
+	if (*size == 0)
+		return NULL;
+	return hfsplus_decompose_table + (off / 4);
+}
+
+int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
+		    const char *astr, int len)
+{
+	int size, dsize, decompose;
+	u16 *dstr, outlen = 0;
 	wchar_t c;
-	u16 outlen = 0;
 
 	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
-
 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
-		size = nls->char2uni(astr, len, &c);
-		if (size <= 0) {
-			c = '?';
-			size = 1;
-		}
-		astr += size;
-		len -= size;
-		switch (c) {
-		case 0x2400:
-			c = 0;
-			break;
-		case ':':
-			c = '/';
-			break;
-		}
-		if (c >= 0xc0 && decompose) {
-			off = hfsplus_decompose_table[(c >> 12) & 0xf];
-			if (!off)
-				goto done;
-			if (off == 0xffff) {
-				goto done;
-			}
-			off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)];
-			if (!off)
-				goto done;
-			off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)];
-			if (!off)
-				goto done;
-			off = hfsplus_decompose_table[off + (c & 0xf)];
-			size = off & 3;
-			if (!size)
-				goto done;
-			off /= 4;
-			if (outlen + size > HFSPLUS_MAX_STRLEN)
+		size = asc2unichar(sb, astr, len, &c);
+
+		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+			if (outlen + dsize > HFSPLUS_MAX_STRLEN)
 				break;
 			do {
-				ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]);
-			} while (--size > 0);
-			continue;
-		}
-	done:
-		ustr->unicode[outlen++] = cpu_to_be16(c);
+				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
+			} while (--dsize > 0);
+		} else
+			ustr->unicode[outlen++] = cpu_to_be16(c);
+
+		astr += size;
+		len -= size;
 	}
 	ustr->length = cpu_to_be16(outlen);
 	if (len > 0)
 		return -ENAMETOOLONG;
 	return 0;
 }
+
+/*
+ * Hash a string to an integer as appropriate for the HFS+ filesystem.
+ * Composed unicode characters are decomposed and case-folding is performed
+ * if the appropriate bits are (un)set on the superblock.
+ */
+int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
+{
+	struct super_block *sb = dentry->d_sb;
+	const char *astr;
+	const u16 *dstr;
+	int casefold, decompose, size, dsize, len;
+	unsigned long hash;
+	wchar_t c;
+	u16 c2;
+
+	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
+	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
+	hash = init_name_hash();
+	astr = str->name;
+	len = str->len;
+	while (len > 0) {
+		size = asc2unichar(sb, astr, len, &c);
+		astr += size;
+		len -= size;
+
+		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+			do {
+				c2 = *dstr++;
+				if (!casefold || (c2 = case_fold(c2)))
+					hash = partial_name_hash(c2, hash);
+			} while (--dsize > 0);
+		} else {
+			c2 = c;
+			if (!casefold || (c2 = case_fold(c2)))
+				hash = partial_name_hash(c2, hash);
+		}
+	}
+	str->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+/*
+ * Compare strings with HFS+ filename ordering.
+ * Composed unicode characters are decomposed and case-folding is performed
+ * if the appropriate bits are (un)set on the superblock.
+ */
+int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+{
+	struct super_block *sb = dentry->d_sb;
+	int casefold, decompose, size;
+	int dsize1, dsize2, len1, len2;
+	const u16 *dstr1, *dstr2;
+	const char *astr1, *astr2;
+	u16 c1, c2;
+	wchar_t c;
+
+	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
+	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
+	astr1 = s1->name;
+	len1 = s1->len;
+	astr2 = s2->name;
+	len2 = s2->len;
+	dsize1 = dsize2 = 0;
+	dstr1 = dstr2 = NULL;
+
+	while (len1 > 0 && len2 > 0) {
+		if (!dsize1) {
+			size = asc2unichar(sb, astr1, len1, &c);
+			astr1 += size;
+			len1 -= size;
+
+			if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
+				c1 = c;
+				dstr1 = &c1;
+				dsize1 = 1;
+			}
+		}
+
+		if (!dsize2) {
+			size = asc2unichar(sb, astr2, len2, &c);
+			astr2 += size;
+			len2 -= size;
+
+			if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
+				c2 = c;
+				dstr2 = &c2;
+				dsize2 = 1;
+			}
+		}
+
+		c1 = *dstr1;
+		c2 = *dstr2;
+		if (casefold) {
+			if  (!(c1 = case_fold(c1))) {
+				dstr1++;
+				dsize1--;
+				continue;
+			}
+			if (!(c2 = case_fold(c2))) {
+				dstr2++;
+				dsize2--;
+				continue;
+			}
+		}
+		if (c1 < c2)
+			return -1;
+		else if (c1 > c2)
+			return 1;
+
+		dstr1++;
+		dsize1--;
+		dstr2++;
+		dsize2--;
+	}
+
+	if (len1 < len2)
+		return -1;
+	if (len1 > len2)
+		return 1;
+	return 0;
+}
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf3..c77862032e8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.write		= do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e5..5b53e5c5d8d 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
 	.mmap		= generic_file_mmap,
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations hpfs_file_iops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aa083dd34e9..d145cb79c30 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -13,15 +13,18 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/file.h>
+#include <linux/kernel.h>
 #include <linux/writeback.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/capability.h>
+#include <linux/ctype.h>
 #include <linux/backing-dev.h>
 #include <linux/hugetlb.h>
 #include <linux/pagevec.h>
+#include <linux/parser.h>
 #include <linux/mman.h>
 #include <linux/quotaops.h>
 #include <linux/slab.h>
@@ -47,6 +50,21 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = {
 
 int sysctl_hugetlb_shm_group;
 
+enum {
+	Opt_size, Opt_nr_inodes,
+	Opt_mode, Opt_uid, Opt_gid,
+	Opt_err,
+};
+
+static match_table_t tokens = {
+	{Opt_size,	"size=%s"},
+	{Opt_nr_inodes,	"nr_inodes=%s"},
+	{Opt_mode,	"mode=%o"},
+	{Opt_uid,	"uid=%u"},
+	{Opt_gid,	"gid=%u"},
+	{Opt_err,	NULL},
+};
+
 static void huge_pagevec_release(struct pagevec *pvec)
 {
 	int i;
@@ -594,46 +612,73 @@ static const struct super_operations hugetlbfs_ops = {
 static int
 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 {
-	char *opt, *value, *rest;
+	char *p, *rest;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
 
 	if (!options)
 		return 0;
-	while ((opt = strsep(&options, ",")) != NULL) {
-		if (!*opt)
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
 			continue;
 
-		value = strchr(opt, '=');
-		if (!value || !*value)
-			return -EINVAL;
-		else
-			*value++ = '\0';
-
-		if (!strcmp(opt, "uid"))
-			pconfig->uid = simple_strtoul(value, &value, 0);
-		else if (!strcmp(opt, "gid"))
-			pconfig->gid = simple_strtoul(value, &value, 0);
-		else if (!strcmp(opt, "mode"))
-			pconfig->mode = simple_strtoul(value,&value,0) & 0777U;
-		else if (!strcmp(opt, "size")) {
-			unsigned long long size = memparse(value, &rest);
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+ 				goto bad_val;
+			pconfig->uid = option;
+			break;
+
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+ 				goto bad_val;
+			pconfig->gid = option;
+			break;
+
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+ 				goto bad_val;
+			pconfig->mode = option & 0777U;
+			break;
+
+		case Opt_size: {
+ 			unsigned long long size;
+			/* memparse() will accept a K/M/G without a digit */
+			if (!isdigit(*args[0].from))
+				goto bad_val;
+			size = memparse(args[0].from, &rest);
 			if (*rest == '%') {
 				size <<= HPAGE_SHIFT;
 				size *= max_huge_pages;
 				do_div(size, 100);
-				rest++;
 			}
 			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
-			value = rest;
-		} else if (!strcmp(opt,"nr_inodes")) {
-			pconfig->nr_inodes = memparse(value, &rest);
-			value = rest;
-		} else
-			return -EINVAL;
+			break;
+		}
+
+		case Opt_nr_inodes:
+			/* memparse() will accept a K/M/G without a digit */
+			if (!isdigit(*args[0].from))
+				goto bad_val;
+			pconfig->nr_inodes = memparse(args[0].from, &rest);
+			break;
 
-		if (*value)
+		default:
+			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
+				 p);
 			return -EINVAL;
+			break;
+		}
 	}
 	return 0;
+
+bad_val:
+ 	printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
+	       args[0].from, p);
+ 	return 1;
 }
 
 static int
@@ -651,7 +696,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	config.gid = current->fsgid;
 	config.mode = 0755;
 	ret = hugetlbfs_parse_options(data, &config);
-
 	if (ret)
 		return ret;
 
@@ -736,15 +780,13 @@ static int can_do_hugetlb_shm(void)
 			can_do_mlock());
 }
 
-struct file *hugetlb_zero_setup(size_t size)
+struct file *hugetlb_file_setup(const char *name, size_t size)
 {
 	int error = -ENOMEM;
 	struct file *file;
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	char buf[16];
-	static atomic_t counter;
 
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
@@ -756,8 +798,7 @@ struct file *hugetlb_zero_setup(size_t size)
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%u", atomic_inc_return(&counter));
-	quick_string.name = buf;
+	quick_string.name = name;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
 	dentry = d_alloc(root, &quick_string);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8c90cbc903f..c2a773e8620 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -12,7 +12,6 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/module.h>
-#include <linux/kallsyms.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -21,7 +20,6 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
 	int error = -ENOTTY;
-	void *f;
 
 	if (!filp->f_op)
 		goto out;
@@ -31,16 +29,10 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
 		if (error == -ENOIOCTLCMD)
 			error = -EINVAL;
 		goto out;
-	} else if ((f = filp->f_op->ioctl)) {
+	} else if (filp->f_op->ioctl) {
 		lock_kernel();
-		if (!filp->f_op->ioctl) {
-			printk("%s: ioctl %p disappeared\n", __FUNCTION__, f);
-			print_symbol("symbol: %s\n", (unsigned long)f);
-			dump_stack();
-		} else {
-			error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
-						  filp, cmd, arg);
-		}
+		error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
+					  filp, cmd, arg);
 		unlock_kernel();
 	}
 
@@ -182,11 +174,3 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
  out:
 	return error;
 }
-
-/*
- * Platforms implementing 32 bit compatibility ioctl handlers in
- * modules need this exported
- */
-#ifdef CONFIG_COMPAT
-EXPORT_SYMBOL(sys_ioctl);
-#endif
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 0e94c31cad9..1ba407c64df 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -7,34 +7,18 @@
  *
  *  Steve Beynon		       : Missing last directory entries fixed
  *  (stephen@askone.demon.co.uk)      : 21st June 1996
- * 
+ *
  *  isofs directory handling functions
  */
 #include <linux/smp_lock.h>
 #include "isofs.h"
 
-static int isofs_readdir(struct file *, void *, filldir_t);
-
-const struct file_operations isofs_dir_operations =
-{
-	.read		= generic_read_dir,
-	.readdir	= isofs_readdir,
-};
-
-/*
- * directories can handle most operations...
- */
-const struct inode_operations isofs_dir_inode_operations =
-{
-	.lookup		= isofs_lookup,
-};
-
 int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode)
 {
 	char * old = de->name;
 	int len = de->name_len[0];
 	int i;
-			
+
 	for (i = 0; i < len; i++) {
 		unsigned char c = old[i];
 		if (!c)
@@ -62,22 +46,27 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod
 }
 
 /* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */
-int get_acorn_filename(struct iso_directory_record * de,
-			    char * retname, struct inode * inode)
+int get_acorn_filename(struct iso_directory_record *de,
+			    char *retname, struct inode *inode)
 {
 	int std;
-	unsigned char * chr;
+	unsigned char *chr;
 	int retnamlen = isofs_name_translate(de, retname, inode);
-	if (retnamlen == 0) return 0;
+
+	if (retnamlen == 0)
+		return 0;
 	std = sizeof(struct iso_directory_record) + de->name_len[0];
-	if (std & 1) std++;
-	if ((*((unsigned char *) de) - std) != 32) return retnamlen;
+	if (std & 1)
+		std++;
+	if ((*((unsigned char *) de) - std) != 32)
+		return retnamlen;
 	chr = ((unsigned char *) de) + std;
-	if (strncmp(chr, "ARCHIMEDES", 10)) return retnamlen;
-	if ((*retname == '_') && ((chr[19] & 1) == 1)) *retname = '!';
+	if (strncmp(chr, "ARCHIMEDES", 10))
+		return retnamlen;
+	if ((*retname == '_') && ((chr[19] & 1) == 1))
+		*retname = '!';
 	if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff)
-		&& ((chr[12] & 0xf0) == 0xf0))
-	{
+		&& ((chr[12] & 0xf0) == 0xf0)) {
 		retname[retnamlen] = ',';
 		sprintf(retname+retnamlen+1, "%3.3x",
 			((chr[12] & 0xf) << 8) | chr[11]);
@@ -91,7 +80,7 @@ int get_acorn_filename(struct iso_directory_record * de,
  */
 static int do_isofs_readdir(struct inode *inode, struct file *filp,
 		void *dirent, filldir_t filldir,
-		char * tmpname, struct iso_directory_record * tmpde)
+		char *tmpname, struct iso_directory_record *tmpde)
 {
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	unsigned char bufbits = ISOFS_BUFFER_BITS(inode);
@@ -121,9 +110,11 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		de_len = *(unsigned char *) de;
 
-		/* If the length byte is zero, we should move on to the next
-		   CDROM sector.  If we are at the end of the directory, we
-		   kick out of the while loop. */
+		/*
+		 * If the length byte is zero, we should move on to the next
+		 * CDROM sector.  If we are at the end of the directory, we
+		 * kick out of the while loop.
+		 */
 
 		if (de_len == 0) {
 			brelse(bh);
@@ -157,11 +148,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		if (first_de) {
 			isofs_normalize_block_and_offset(de,
-							 &block_saved,
-							 &offset_saved);
+							&block_saved,
+							&offset_saved);
 			inode_number = isofs_get_ino(block_saved,
-						     offset_saved,
-						     bufbits);
+							offset_saved, bufbits);
 		}
 
 		if (de->flags[-sbi->s_high_sierra] & 0x80) {
@@ -199,7 +189,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 		 */
 		if ((sbi->s_hide == 'y' &&
 				(de->flags[-sbi->s_high_sierra] & 1)) ||
-		      (sbi->s_showassoc =='n' &&
+				(sbi->s_showassoc =='n' &&
 				(de->flags[-sbi->s_high_sierra] & 4))) {
 			filp->f_pos += de_len;
 			continue;
@@ -240,7 +230,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		continue;
 	}
-	if (bh) brelse(bh);
+	if (bh)
+		brelse(bh);
 	return 0;
 }
 
@@ -253,8 +244,8 @@ static int isofs_readdir(struct file *filp,
 		void *dirent, filldir_t filldir)
 {
 	int result;
-	char * tmpname;
-	struct iso_directory_record * tmpde;
+	char *tmpname;
+	struct iso_directory_record *tmpde;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	tmpname = (char *)__get_free_page(GFP_KERNEL);
@@ -270,3 +261,19 @@ static int isofs_readdir(struct file *filp,
 	unlock_kernel();
 	return result;
 }
+
+const struct file_operations isofs_dir_operations =
+{
+	.read = generic_read_dir,
+	.readdir = isofs_readdir,
+};
+
+/*
+ * directories can handle most operations...
+ */
+const struct inode_operations isofs_dir_inode_operations =
+{
+	.lookup = isofs_lookup,
+};
+
+
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 5c3eecf7542..4f5418be059 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,20 +73,20 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct iso_inode_info *ei = foo;
 
 	inode_init_once(&ei->vfs_inode);
 }
- 
+
 static int init_inodecache(void)
 {
 	isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
-					     sizeof(struct iso_inode_info),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     init_once, NULL);
+					sizeof(struct iso_inode_info),
+					0, (SLAB_RECLAIM_ACCOUNT|
+					SLAB_MEM_SPREAD),
+					init_once, NULL);
 	if (isofs_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
@@ -150,9 +150,9 @@ struct iso9660_options{
 	uid_t uid;
 	char *iocharset;
 	unsigned char utf8;
-        /* LVE */
-        s32 session;
-        s32 sbsector;
+	/* LVE */
+	s32 session;
+	s32 sbsector;
 };
 
 /*
@@ -197,7 +197,7 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
 	hash = init_name_hash();
 	while (len--) {
 		c = tolower(*name++);
-		hash = partial_name_hash(tolower(c), hash);
+		hash = partial_name_hash(c, hash);
 	}
 	qstr->hash = end_name_hash(hash);
 
@@ -360,10 +360,12 @@ static int parse_options(char *options, struct iso9660_options *popt)
 	popt->check = 'u';		/* unset */
 	popt->nocompress = 0;
 	popt->blocksize = 1024;
-	popt->mode = S_IRUGO | S_IXUGO; /* r-x for all.  The disc could
-					   be shared with DOS machines so
-					   virtually anything could be
-					   a valid executable. */
+	popt->mode = S_IRUGO | S_IXUGO; /*
+					 * r-x for all.  The disc could
+					 * be shared with DOS machines so
+					 * virtually anything could be
+					 * a valid executable.
+					 */
 	popt->gid = 0;
 	popt->uid = 0;
 	popt->iocharset = NULL;
@@ -503,30 +505,30 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 		Te.cdte_format=CDROM_LBA;
 		i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
 		if (!i) {
-			printk(KERN_DEBUG "Session %d start %d type %d\n",
-			       session, Te.cdte_addr.lba,
-			       Te.cdte_ctrl&CDROM_DATA_TRACK);
+			printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
+				session, Te.cdte_addr.lba,
+				Te.cdte_ctrl&CDROM_DATA_TRACK);
 			if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
 				return Te.cdte_addr.lba;
 		}
-			
-		printk(KERN_ERR "Invalid session number or type of track\n");
+
+		printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
 	}
 	i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
 	if (session > 0)
-		printk(KERN_ERR "Invalid session number\n");
+		printk(KERN_ERR "ISOFS: Invalid session number\n");
 #if 0
-	printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
+	printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
 	if (i==0) {
-		printk("isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
-		printk("isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
+		printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
+		printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
 	}
 #endif
 	if (i==0)
 #if WE_OBEY_THE_WRITTEN_STANDARDS
-        if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
+		if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
 #endif
-		vol_desc_start=ms_info.addr.lba;
+			vol_desc_start=ms_info.addr.lba;
 	return vol_desc_start;
 }
 
@@ -538,20 +540,20 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
  */
 static int isofs_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct buffer_head	      * bh = NULL, *pri_bh = NULL;
-	struct hs_primary_descriptor  * h_pri = NULL;
-	struct iso_primary_descriptor * pri = NULL;
+	struct buffer_head *bh = NULL, *pri_bh = NULL;
+	struct hs_primary_descriptor *h_pri = NULL;
+	struct iso_primary_descriptor *pri = NULL;
 	struct iso_supplementary_descriptor *sec = NULL;
-	struct iso_directory_record   * rootp;
-	int				joliet_level = 0;
-	int				iso_blknum, block;
-	int				orig_zonesize;
-	int				table;
-	unsigned int			vol_desc_start;
-	unsigned long			first_data_zone;
-	struct inode		      * inode;
-	struct iso9660_options		opt;
-	struct isofs_sb_info	      * sbi;
+	struct iso_directory_record *rootp;
+	struct inode *inode;
+	struct iso9660_options opt;
+	struct isofs_sb_info *sbi;
+	unsigned long first_data_zone;
+	int joliet_level = 0;
+	int iso_blknum, block;
+	int orig_zonesize;
+	int table;
+	unsigned int vol_desc_start;
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
@@ -577,72 +579,73 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	vol_desc_start = (opt.sbsector != -1) ?
 		opt.sbsector : isofs_get_last_session(s,opt.session);
 
-  	for (iso_blknum = vol_desc_start+16;
-             iso_blknum < vol_desc_start+100; iso_blknum++)
-	{
-	    struct hs_volume_descriptor   * hdp;
-	    struct iso_volume_descriptor  * vdp;
-
-	    block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits);
-	    if (!(bh = sb_bread(s, block)))
-		goto out_no_read;
-
-	    vdp = (struct iso_volume_descriptor *)bh->b_data;
-	    hdp = (struct hs_volume_descriptor *)bh->b_data;
-	    
-	    /* Due to the overlapping physical location of the descriptors, 
-	     * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure 
-	     * proper identification in this case, we first check for ISO.
-	     */
-	    if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) {
-		if (isonum_711 (vdp->type) == ISO_VD_END)
-		    break;
-		if (isonum_711 (vdp->type) == ISO_VD_PRIMARY) {
-		    if (pri == NULL) {
-			pri = (struct iso_primary_descriptor *)vdp;
-			/* Save the buffer in case we need it ... */
-			pri_bh = bh;
-			bh = NULL;
-		    }
-		}
+	for (iso_blknum = vol_desc_start+16;
+		iso_blknum < vol_desc_start+100; iso_blknum++) {
+		struct hs_volume_descriptor *hdp;
+		struct iso_volume_descriptor  *vdp;
+
+		block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits);
+		if (!(bh = sb_bread(s, block)))
+			goto out_no_read;
+
+		vdp = (struct iso_volume_descriptor *)bh->b_data;
+		hdp = (struct hs_volume_descriptor *)bh->b_data;
+
+		/*
+		 * Due to the overlapping physical location of the descriptors,
+		 * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure
+		 * proper identification in this case, we first check for ISO.
+		 */
+		if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) {
+			if (isonum_711(vdp->type) == ISO_VD_END)
+				break;
+			if (isonum_711(vdp->type) == ISO_VD_PRIMARY) {
+				if (pri == NULL) {
+					pri = (struct iso_primary_descriptor *)vdp;
+					/* Save the buffer in case we need it ... */
+					pri_bh = bh;
+					bh = NULL;
+				}
+			}
 #ifdef CONFIG_JOLIET
-		else if (isonum_711 (vdp->type) == ISO_VD_SUPPLEMENTARY) {
-		    sec = (struct iso_supplementary_descriptor *)vdp;
-		    if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
-			if (opt.joliet == 'y') {
-			    if (sec->escape[2] == 0x40) {
-				joliet_level = 1;
-			    } else if (sec->escape[2] == 0x43) {
-				joliet_level = 2;
-			    } else if (sec->escape[2] == 0x45) {
-				joliet_level = 3;
-			    }
-			    printk(KERN_DEBUG"ISO 9660 Extensions: Microsoft Joliet Level %d\n",
-				   joliet_level);
+			else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) {
+				sec = (struct iso_supplementary_descriptor *)vdp;
+				if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
+					if (opt.joliet == 'y') {
+						if (sec->escape[2] == 0x40)
+							joliet_level = 1;
+						else if (sec->escape[2] == 0x43)
+							joliet_level = 2;
+						else if (sec->escape[2] == 0x45)
+							joliet_level = 3;
+
+						printk(KERN_DEBUG "ISO 9660 Extensions: "
+							"Microsoft Joliet Level %d\n",
+							joliet_level);
+					}
+					goto root_found;
+				} else {
+				/* Unknown supplementary volume descriptor */
+				sec = NULL;
+				}
 			}
-			goto root_found;
-		    } else {
-			/* Unknown supplementary volume descriptor */
-			sec = NULL;
-		    }
-		}
 #endif
-	    } else {
-	        if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) {
-		    if (isonum_711 (hdp->type) != ISO_VD_PRIMARY)
-		        goto out_freebh;
-		
-		    sbi->s_high_sierra = 1;
-		    opt.rock = 'n';
-		    h_pri = (struct hs_primary_descriptor *)vdp;
-		    goto root_found;
+		} else {
+			if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) {
+				if (isonum_711(hdp->type) != ISO_VD_PRIMARY)
+					goto out_freebh;
+
+				sbi->s_high_sierra = 1;
+				opt.rock = 'n';
+				h_pri = (struct hs_primary_descriptor *)vdp;
+				goto root_found;
+			}
 		}
-	    }
 
-            /* Just skip any volume descriptors we don't recognize */
+		/* Just skip any volume descriptors we don't recognize */
 
-	    brelse(bh);
-	    bh = NULL;
+		brelse(bh);
+		bh = NULL;
 	}
 	/*
 	 * If we fall through, either no volume descriptor was found,
@@ -657,24 +660,24 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 root_found:
 
 	if (joliet_level && (pri == NULL || opt.rock == 'n')) {
-	    /* This is the case of Joliet with the norock mount flag.
-	     * A disc with both Joliet and Rock Ridge is handled later
-	     */
-	    pri = (struct iso_primary_descriptor *) sec;
+		/* This is the case of Joliet with the norock mount flag.
+		 * A disc with both Joliet and Rock Ridge is handled later
+		 */
+		pri = (struct iso_primary_descriptor *) sec;
 	}
 
 	if(sbi->s_high_sierra){
-	  rootp = (struct iso_directory_record *) h_pri->root_directory_record;
-	  sbi->s_nzones = isonum_733 (h_pri->volume_space_size);
-	  sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size);
-	  sbi->s_max_size = isonum_733(h_pri->volume_space_size);
+		rootp = (struct iso_directory_record *) h_pri->root_directory_record;
+		sbi->s_nzones = isonum_733(h_pri->volume_space_size);
+		sbi->s_log_zone_size = isonum_723(h_pri->logical_block_size);
+		sbi->s_max_size = isonum_733(h_pri->volume_space_size);
 	} else {
-	  if (!pri)
-	    goto out_freebh;
-	  rootp = (struct iso_directory_record *) pri->root_directory_record;
-	  sbi->s_nzones = isonum_733 (pri->volume_space_size);
-	  sbi->s_log_zone_size = isonum_723 (pri->logical_block_size);
-	  sbi->s_max_size = isonum_733(pri->volume_space_size);
+		if (!pri)
+			goto out_freebh;
+		rootp = (struct iso_directory_record *) pri->root_directory_record;
+		sbi->s_nzones = isonum_733(pri->volume_space_size);
+		sbi->s_log_zone_size = isonum_723(pri->logical_block_size);
+		sbi->s_max_size = isonum_733(pri->volume_space_size);
 	}
 
 	sbi->s_ninodes = 0; /* No way to figure this out easily */
@@ -687,42 +690,43 @@ root_found:
 	 * blocks that were 512 bytes (which should only very rarely
 	 * happen.)
 	 */
-	if(orig_zonesize < opt.blocksize)
+	if (orig_zonesize < opt.blocksize)
 		goto out_bad_size;
 
 	/* RDE: convert log zone size to bit shift */
-	switch (sbi->s_log_zone_size)
-	  { case  512: sbi->s_log_zone_size =  9; break;
-	    case 1024: sbi->s_log_zone_size = 10; break;
-	    case 2048: sbi->s_log_zone_size = 11; break;
+	switch (sbi->s_log_zone_size) {
+	case  512: sbi->s_log_zone_size =  9; break;
+	case 1024: sbi->s_log_zone_size = 10; break;
+	case 2048: sbi->s_log_zone_size = 11; break;
 
-	    default:
+	default:
 		goto out_bad_zone_size;
-	  }
+	}
 
 	s->s_magic = ISOFS_SUPER_MAGIC;
 	s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */
 
-	/* The CDROM is read-only, has no nodes (devices) on it, and since
-	   all of the files appear to be owned by root, we really do not want
-	   to allow suid.  (suid or devices will not show up unless we have
-	   Rock Ridge extensions) */
+	/*
+	 * The CDROM is read-only, has no nodes (devices) on it, and since
+	 * all of the files appear to be owned by root, we really do not want
+	 * to allow suid.  (suid or devices will not show up unless we have
+	 * Rock Ridge extensions)
+	 */
 
 	s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */;
 
 	/* Set this for reference. Its not currently used except on write
 	   which we don't have .. */
-	   
-	first_data_zone = isonum_733 (rootp->extent) +
-			  isonum_711 (rootp->ext_attr_length);
+
+	first_data_zone = isonum_733(rootp->extent) +
+			  isonum_711(rootp->ext_attr_length);
 	sbi->s_firstdatazone = first_data_zone;
 #ifndef BEQUIET
-	printk(KERN_DEBUG "Max size:%ld   Log zone size:%ld\n",
-	       sbi->s_max_size,
-	       1UL << sbi->s_log_zone_size);
-	printk(KERN_DEBUG "First datazone:%ld\n", sbi->s_firstdatazone);
+	printk(KERN_DEBUG "ISOFS: Max size:%ld   Log zone size:%ld\n",
+		sbi->s_max_size, 1UL << sbi->s_log_zone_size);
+	printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
 	if(sbi->s_high_sierra)
-		printk(KERN_DEBUG "Disc in High Sierra format.\n");
+		printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
 #endif
 
 	/*
@@ -737,8 +741,8 @@ root_found:
 		pri = (struct iso_primary_descriptor *) sec;
 		rootp = (struct iso_directory_record *)
 			pri->root_directory_record;
-		first_data_zone = isonum_733 (rootp->extent) +
-			  	isonum_711 (rootp->ext_attr_length);
+		first_data_zone = isonum_733(rootp->extent) +
+				isonum_711(rootp->ext_attr_length);
 	}
 
 	/*
@@ -771,7 +775,7 @@ root_found:
 
 #ifdef CONFIG_JOLIET
 	if (joliet_level && opt.utf8 == 0) {
-		char * p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
+		char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
 		sbi->s_nls_iocharset = load_nls(p);
 		if (! sbi->s_nls_iocharset) {
 			/* Fail only if explicit charset specified */
@@ -821,7 +825,7 @@ root_found:
 		sbi->s_rock = 0;
 		if (sbi->s_firstdatazone != first_data_zone) {
 			sbi->s_firstdatazone = first_data_zone;
-			printk(KERN_DEBUG 
+			printk(KERN_DEBUG
 				"ISOFS: changing to secondary root\n");
 			iput(inode);
 			inode = isofs_iget(s, sbi->s_firstdatazone, 0);
@@ -830,8 +834,10 @@ root_found:
 
 	if (opt.check == 'u') {
 		/* Only Joliet is case insensitive by default */
-		if (joliet_level) opt.check = 'r';
-		else opt.check = 's';
+		if (joliet_level)
+			opt.check = 'r';
+		else
+			opt.check = 's';
 	}
 	sbi->s_joliet_level = joliet_level;
 
@@ -846,8 +852,10 @@ root_found:
 		goto out_no_root;
 
 	table = 0;
-	if (joliet_level) table += 2;
-	if (opt.check == 'r') table++;
+	if (joliet_level)
+		table += 2;
+	if (opt.check == 'r')
+		table++;
 	s->s_root->d_op = &isofs_dentry_ops[table];
 
 	kfree(opt.iocharset);
@@ -858,10 +866,10 @@ root_found:
 	 * Display error messages and free resources.
 	 */
 out_bad_root:
-	printk(KERN_WARNING "isofs_fill_super: root inode not initialized\n");
+	printk(KERN_WARNING "%s: root inode not initialized\n", __func__);
 	goto out_iput;
 out_no_root:
-	printk(KERN_WARNING "isofs_fill_super: get root inode failed\n");
+	printk(KERN_WARNING "%s: get root inode failed\n", __func__);
 out_iput:
 	iput(inode);
 #ifdef CONFIG_JOLIET
@@ -870,21 +878,20 @@ out_iput:
 #endif
 	goto out_freesbi;
 out_no_read:
-	printk(KERN_WARNING "isofs_fill_super: "
-		"bread failed, dev=%s, iso_blknum=%d, block=%d\n",
-		s->s_id, iso_blknum, block);
+	printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
+		__func__, s->s_id, iso_blknum, block);
 	goto out_freesbi;
 out_bad_zone_size:
-	printk(KERN_WARNING "Bad logical zone size %ld\n",
+	printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
 		sbi->s_log_zone_size);
 	goto out_freebh;
 out_bad_size:
-	printk(KERN_WARNING "Logical zone size(%d) < hardware blocksize(%u)\n",
+	printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
 		orig_zonesize, opt.blocksize);
 	goto out_freebh;
 out_unknown_format:
 	if (!silent)
-		printk(KERN_WARNING "Unable to identify CD-ROM format.\n");
+		printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
 
 out_freebh:
 	brelse(bh);
@@ -902,7 +909,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
 	buf->f_type = ISOFS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (ISOFS_SB(sb)->s_nzones
-                  << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits));
+		<< (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits));
 	buf->f_bfree = 0;
 	buf->f_bavail = 0;
 	buf->f_files = ISOFS_SB(sb)->s_ninodes;
@@ -931,20 +938,20 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 
 	rv = 0;
 	if (iblock < 0 || iblock != iblock_s) {
-		printk("isofs_get_blocks: block number too large\n");
+		printk(KERN_DEBUG "%s: block number too large\n", __func__);
 		goto abort;
 	}
 
 	b_off = iblock;
-	
-	offset    = 0;
-	firstext  = ei->i_first_extent;
+
+	offset = 0;
+	firstext = ei->i_first_extent;
 	sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode);
-	nextblk   = ei->i_next_section_block;
-	nextoff   = ei->i_next_section_offset;
-	section   = 0;
+	nextblk = ei->i_next_section_block;
+	nextoff = ei->i_next_section_offset;
+	section = 0;
 
-	while ( nblocks ) {
+	while (nblocks) {
 		/* If we are *way* beyond the end of the file, print a message.
 		 * Access beyond the end of the file up to the next page boundary
 		 * is normal, however because of the way the page cache works.
@@ -953,11 +960,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 		 * I/O errors.
 		 */
 		if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
-			printk("isofs_get_blocks: block >= EOF (%ld, %ld)\n",
-			       iblock, (unsigned long) inode->i_size);
+			printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n",
+				__func__, iblock, (unsigned long) inode->i_size);
 			goto abort;
 		}
-		
+
 		/* On the last section, nextblk == 0, section size is likely to
 		 * exceed sect_size by a partial block, and access beyond the
 		 * end of the file will reach beyond the section size, too.
@@ -976,20 +983,21 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 			iput(ninode);
 
 			if (++section > 100) {
-				printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n");
-				printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u "
-				       "nextblk=%lu nextoff=%lu\n",
-				       iblock, firstext, (unsigned) sect_size,
-				       nextblk, nextoff);
+				printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
+					" aborting...\n", __func__);
+				printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u "
+					"nextblk=%lu nextoff=%lu\n", __func__,
+					iblock, firstext, (unsigned) sect_size,
+					nextblk, nextoff);
 				goto abort;
 			}
 		}
-		
-		if ( *bh ) {
+
+		if (*bh) {
 			map_bh(*bh, inode->i_sb, firstext + b_off - offset);
 		} else {
 			*bh = sb_getblk(inode->i_sb, firstext+b_off-offset);
-			if ( !*bh )
+			if (!*bh)
 				goto abort;
 		}
 		bh++;	/* Next buffer head */
@@ -1010,7 +1018,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
 		    struct buffer_head *bh_result, int create)
 {
 	if (create) {
-		printk("isofs_get_block: Kernel tries to allocate a block\n");
+		printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
 		return -EROFS;
 	}
 
@@ -1070,11 +1078,11 @@ static int isofs_read_level3_size(struct inode *inode)
 {
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra;
-	struct buffer_head * bh = NULL;
+	struct buffer_head *bh = NULL;
 	unsigned long block, offset, block_saved, offset_saved;
 	int i = 0;
 	int more_entries = 0;
-	struct iso_directory_record * tmpde = NULL;
+	struct iso_directory_record *tmpde = NULL;
 	struct iso_inode_info *ei = ISOFS_I(inode);
 
 	inode->i_size = 0;
@@ -1089,7 +1097,7 @@ static int isofs_read_level3_size(struct inode *inode)
 	offset = ei->i_iget5_offset;
 
 	do {
-		struct iso_directory_record * de;
+		struct iso_directory_record *de;
 		unsigned int de_len;
 
 		if (!bh) {
@@ -1163,10 +1171,9 @@ out_noread:
 	return -EIO;
 
 out_toomany:
-	printk(KERN_INFO "isofs_read_level3_size: "
-		"More than 100 file sections ?!?, aborting...\n"
-	  	"isofs_read_level3_size: inode=%lu\n",
-		inode->i_ino);
+	printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
+		"isofs_read_level3_size: inode=%lu\n",
+		__func__, inode->i_ino);
 	goto out;
 }
 
@@ -1177,9 +1184,9 @@ static void isofs_read_inode(struct inode *inode)
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	unsigned long block;
 	int high_sierra = sbi->s_high_sierra;
-	struct buffer_head * bh = NULL;
-	struct iso_directory_record * de;
-	struct iso_directory_record * tmpde = NULL;
+	struct buffer_head *bh = NULL;
+	struct iso_directory_record *de;
+	struct iso_directory_record *tmpde = NULL;
 	unsigned int de_len;
 	unsigned long offset;
 	struct iso_inode_info *ei = ISOFS_I(inode);
@@ -1199,7 +1206,7 @@ static void isofs_read_inode(struct inode *inode)
 
 		tmpde = kmalloc(de_len, GFP_KERNEL);
 		if (tmpde == NULL) {
-			printk(KERN_INFO "isofs_read_inode: out of memory\n");
+			printk(KERN_INFO "%s: out of memory\n", __func__);
 			goto fail;
 		}
 		memcpy(tmpde, bh->b_data + offset, frag1);
@@ -1212,24 +1219,26 @@ static void isofs_read_inode(struct inode *inode)
 	}
 
 	inode->i_ino = isofs_get_ino(ei->i_iget5_block,
-				     ei->i_iget5_offset,
-				     ISOFS_BUFFER_BITS(inode));
+					ei->i_iget5_offset,
+					ISOFS_BUFFER_BITS(inode));
 
 	/* Assume it is a normal-format file unless told otherwise */
 	ei->i_file_format = isofs_file_normal;
 
 	if (de->flags[-high_sierra] & 2) {
 		inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR;
-		inode->i_nlink = 1; /* Set to 1.  We know there are 2, but
-				       the find utility tries to optimize
-				       if it is 2, and it screws up.  It is
-				       easier to give 1 which tells find to
-				       do it the hard way. */
+		inode->i_nlink = 1;	/*
+					 * Set to 1.  We know there are 2, but
+					 * the find utility tries to optimize
+					 * if it is 2, and it screws up.  It is
+					 * easier to give 1 which tells find to
+					 * do it the hard way.
+					 */
 	} else {
- 		/* Everybody gets to read the file. */
+		/* Everybody gets to read the file. */
 		inode->i_mode = sbi->s_mode;
 		inode->i_nlink = 1;
-	        inode->i_mode |= S_IFREG;
+		inode->i_mode |= S_IFREG;
 	}
 	inode->i_uid = sbi->s_uid;
 	inode->i_gid = sbi->s_gid;
@@ -1239,13 +1248,14 @@ static void isofs_read_inode(struct inode *inode)
 	ei->i_format_parm[1] = 0;
 	ei->i_format_parm[2] = 0;
 
-	ei->i_section_size = isonum_733 (de->size);
+	ei->i_section_size = isonum_733(de->size);
 	if (de->flags[-high_sierra] & 0x80) {
-		if(isofs_read_level3_size(inode)) goto fail;
+		if(isofs_read_level3_size(inode))
+			goto fail;
 	} else {
 		ei->i_next_section_block = 0;
 		ei->i_next_section_offset = 0;
-		inode->i_size = isonum_733 (de->size);
+		inode->i_size = isonum_733(de->size);
 	}
 
 	/*
@@ -1258,23 +1268,24 @@ static void isofs_read_inode(struct inode *inode)
 		inode->i_size &= 0x00ffffff;
 
 	if (de->interleave[0]) {
-		printk("Interleaved files not (yet) supported.\n");
+		printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
 		inode->i_size = 0;
 	}
 
 	/* I have no idea what file_unit_size is used for, so
 	   we will flag it for now */
 	if (de->file_unit_size[0] != 0) {
-		printk("File unit size != 0 for ISO file (%ld).\n",
-		       inode->i_ino);
+		printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
+			inode->i_ino);
 	}
 
 	/* I have no idea what other flag bits are used for, so
 	   we will flag it for now */
 #ifdef DEBUG
 	if((de->flags[-high_sierra] & ~2)!= 0){
-		printk("Unusual flag settings for ISO file (%ld %x).\n",
-		       inode->i_ino, de->flags[-high_sierra]);
+		printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
+				"(%ld %x).\n",
+			inode->i_ino, de->flags[-high_sierra]);
 	}
 #endif
 
@@ -1285,11 +1296,11 @@ static void isofs_read_inode(struct inode *inode)
 	inode->i_atime.tv_nsec =
 	inode->i_ctime.tv_nsec = 0;
 
-	ei->i_first_extent = (isonum_733 (de->extent) +
-			      isonum_711 (de->ext_attr_length));
+	ei->i_first_extent = (isonum_733(de->extent) +
+			isonum_711(de->ext_attr_length));
 
 	/* Set the number of blocks for stat() - should be done before RR */
-	inode->i_blocks  = (inode->i_size + 511) >> 9;
+	inode->i_blocks = (inode->i_size + 511) >> 9;
 
 	/*
 	 * Now test for possible Rock Ridge extensions which will override
@@ -1306,7 +1317,7 @@ static void isofs_read_inode(struct inode *inode)
 	/* Install the inode operations vector */
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_fop = &generic_ro_fops;
-		switch ( ei->i_file_format ) {
+		switch (ei->i_file_format) {
 #ifdef CONFIG_ZISOFS
 		case isofs_file_compressed:
 			inode->i_data.a_ops = &zisofs_aops;
@@ -1350,7 +1361,7 @@ static int isofs_iget5_test(struct inode *ino, void *data)
 	struct isofs_iget5_callback_data *d =
 		(struct isofs_iget5_callback_data*)data;
 	return (i->i_iget5_block == d->block)
-	       && (i->i_iget5_offset == d->offset);
+		&& (i->i_iget5_offset == d->offset);
 }
 
 static int isofs_iget5_set(struct inode *ino, void *data)
@@ -1384,7 +1395,7 @@ struct inode *isofs_iget(struct super_block *sb,
 	hashval = (block << sb->s_blocksize_bits) | offset;
 
 	inode = iget5_locked(sb, hashval, &isofs_iget5_test,
-			     &isofs_iget5_set, &data);
+				&isofs_iget5_set, &data);
 
 	if (inode && (inode->i_state & I_NEW)) {
 		sb->s_op->read_inode(inode);
@@ -1398,7 +1409,7 @@ static int isofs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
-			   mnt);
+				mnt);
 }
 
 static struct file_system_type iso9660_fs_type = {
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index fb8fe7a9ddc..92c14b850e9 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -80,22 +80,20 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
 
 	if (utf8) {
 		len = wcsntombs_be(outname, de->name,
-				   de->name_len[0] >> 1, PAGE_SIZE);
+				de->name_len[0] >> 1, PAGE_SIZE);
 	} else {
 		len = uni16_to_x8(outname, (__be16 *) de->name,
-				  de->name_len[0] >> 1, nls);
+				de->name_len[0] >> 1, nls);
 	}
-	if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) {
+	if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1'))
 		len -= 2;
-	}
 
 	/*
 	 * Windows doesn't like periods at the end of a name,
 	 * so neither do we
 	 */
-	while (len >= 2 && (outname[len-1] == '.')) {
+	while (len >= 2 && (outname[len-1] == '.'))
 		len--;
-	}
 
 	return len;
 }
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index c04b3a14a3e..c8c7e5138a0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -15,7 +15,7 @@
  * some sanity tests.
  */
 static int
-isofs_cmp(struct dentry * dentry, const char * compare, int dlen)
+isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
 {
 	struct qstr qstr;
 
@@ -48,24 +48,24 @@ isofs_cmp(struct dentry * dentry, const char * compare, int dlen)
  */
 static unsigned long
 isofs_find_entry(struct inode *dir, struct dentry *dentry,
-	unsigned long *block_rv, unsigned long* offset_rv,
-	char * tmpname, struct iso_directory_record * tmpde)
+	unsigned long *block_rv, unsigned long *offset_rv,
+	char *tmpname, struct iso_directory_record *tmpde)
 {
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(dir);
 	unsigned char bufbits = ISOFS_BUFFER_BITS(dir);
 	unsigned long block, f_pos, offset, block_saved, offset_saved;
-	struct buffer_head * bh = NULL;
+	struct buffer_head *bh = NULL;
 	struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);
 
 	if (!ISOFS_I(dir)->i_first_extent)
 		return 0;
-  
+
 	f_pos = 0;
 	offset = 0;
 	block = 0;
 
 	while (f_pos < dir->i_size) {
-		struct iso_directory_record * de;
+		struct iso_directory_record *de;
 		int de_len, match, i, dlen;
 		char *dpnt;
 
@@ -114,7 +114,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 
 		if (sbi->s_rock &&
 		    ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
-			dlen = i; 	/* possibly -1 */
+			dlen = i;	/* possibly -1 */
 			dpnt = tmpname;
 #ifdef CONFIG_JOLIET
 		} else if (sbi->s_joliet_level) {
@@ -145,8 +145,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 			isofs_normalize_block_and_offset(de,
 							 &block_saved,
 							 &offset_saved);
-                        *block_rv = block_saved;
-                        *offset_rv = offset_saved;
+			*block_rv = block_saved;
+			*offset_rv = offset_saved;
 			brelse(bh);
 			return 1;
 		}
@@ -155,7 +155,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 	return 0;
 }
 
-struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	int found;
 	unsigned long block, offset;
@@ -170,9 +170,9 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n
 
 	lock_kernel();
 	found = isofs_find_entry(dir, dentry,
-				 &block, &offset,
-				 page_address(page),
-				 1024 + page_address(page));
+				&block, &offset,
+				page_address(page),
+				1024 + page_address(page));
 	__free_page(page);
 
 	inode = NULL;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 1facfaff97c..a003d50edcd 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -887,7 +887,8 @@ restart_loop:
 	journal->j_committing_transaction = NULL;
 	spin_unlock(&journal->j_state_lock);
 
-	if (commit_transaction->t_checkpoint_list == NULL) {
+	if (commit_transaction->t_checkpoint_list == NULL &&
+	    commit_transaction->t_checkpoint_io_list == NULL) {
 		__journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 824e3b7d4ec..8db2fa25170 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -68,6 +68,7 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #endif
+#include <linux/log2.h>
 
 static struct kmem_cache *revoke_record_cache;
 static struct kmem_cache *revoke_table_cache;
@@ -211,7 +212,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke = journal->j_revoke_table[0];
 
 	/* Check that the hash_size is a power of two */
-	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+	J_ASSERT(is_power_of_2(hash_size));
 
 	journal->j_revoke->hash_size = hash_size;
 
@@ -238,7 +239,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke = journal->j_revoke_table[1];
 
 	/* Check that the hash_size is a power of two */
-	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+	J_ASSERT(is_power_of_2(hash_size));
 
 	journal->j_revoke->hash_size = hash_size;
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2856e1100a5..c0f59d1b13d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -896,7 +896,8 @@ restart_loop:
 	journal->j_committing_transaction = NULL;
 	spin_unlock(&journal->j_state_lock);
 
-	if (commit_transaction->t_checkpoint_list == NULL) {
+	if (commit_transaction->t_checkpoint_list == NULL &&
+	    commit_transaction->t_checkpoint_io_list == NULL) {
 		__jbd2_journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9246e763da7..28cac049a56 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -68,6 +68,7 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #endif
+#include <linux/log2.h>
 
 static struct kmem_cache *jbd2_revoke_record_cache;
 static struct kmem_cache *jbd2_revoke_table_cache;
@@ -212,7 +213,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke = journal->j_revoke_table[0];
 
 	/* Check that the hash_size is a power of two */
-	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+	J_ASSERT(is_power_of_2(hash_size));
 
 	journal->j_revoke->hash_size = hash_size;
 
@@ -239,7 +240,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke = journal->j_revoke_table[1];
 
 	/* Check that the hash_size is a power of two */
-	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+	J_ASSERT(is_power_of_2(hash_size));
 
 	journal->j_revoke->hash_size = hash_size;
 
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1e..c2530197be0 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
 	.ioctl =	jffs2_ioctl,
 	.mmap =		generic_file_readonly_mmap,
 	.fsync =	jffs2_fsync,
-	.sendfile =	generic_file_sendfile
+	.splice_read =	generic_file_splice_read,
 };
 
 /* jffs2_file_inode_operations */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 12e83f67eee..7b363786c2d 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -210,8 +210,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *
  * offset, and the one with the smallest length will come first in the
  * ordering.
  *
- * Returns 0 if the node was inserted
- *         1 if the node is obsolete (because we can't mark it so yet)
+ * Returns 0 if the node was handled (including marking it obsolete)
  *         < 0 an if error occurred
  */
 static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
@@ -572,8 +571,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
  * Helper function for jffs2_get_inode_nodes().
  * It is called every time an directory entry node is found.
  *
- * Returns: 0 on succes;
- * 	    1 if the node should be marked obsolete;
+ * Returns: 0 on success;
  * 	    negative error code on failure.
  */
 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -680,8 +678,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
  * Helper function for jffs2_get_inode_nodes().
  * It is called every time an inode node is found.
  *
- * Returns: 0 on success;
- * 	    1 if the node should be marked obsolete;
+ * Returns: 0 on success (possibly after marking a bad node obsolete);
  * 	    negative error code on failure.
  */
 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -690,7 +687,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 {
 	struct jffs2_tmp_dnode_info *tn;
 	uint32_t len, csize;
-	int ret = 1;
+	int ret = 0;
 	uint32_t crc;
 
 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
@@ -719,8 +716,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 		/* Sanity checks */
 		if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
 		    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
-				JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
-				jffs2_dbg_dump_node(c, ref_offset(ref));
+			JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
+			jffs2_dbg_dump_node(c, ref_offset(ref));
+			jffs2_mark_node_obsolete(c, ref);
 			goto free_out;
 		}
 
@@ -775,6 +773,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 			if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
 				JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
 					ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
+				jffs2_mark_node_obsolete(c, ref);
 				goto free_out;
 			}
 
@@ -854,7 +853,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
  * It is called every time an unknown node is found.
  *
  * Returns: 0 on success;
- * 	    1 if the node should be marked obsolete;
  * 	    negative error code on failure.
  */
 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
@@ -1088,10 +1086,7 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
 			}
 
 			err = read_unknown(c, ref, &node->u);
-			if (err == 1) {
-				jffs2_mark_node_obsolete(c, ref);
-				break;
-			} else if (unlikely(err))
+			if (unlikely(err))
 				goto free_out;
 
 		}
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b1..fa92f7f1d0d 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
 	__u32 __x = (x); \
 	((__u32)( \
 		((__x & (__u32)0x000000ffUL) << 16) | \
-		 (__x & (__u32)0x0000ff00UL)        | \
+		 (__x & (__u32)0x0000ff00UL)	    | \
 		((__x & (__u32)0x00ff0000UL) >> 16) )); \
 })
 
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7..87eb93694af 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 	.fsync		= jfs_fsync,
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aa..887f5759e53 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
 #include "jfs_filsys.h"
 #include "jfs_debug.h"
 
-#ifdef CONFIG_JFS_DEBUG
-void dump_mem(char *label, void *data, int length)
-{
-	int i, j;
-	int *intptr = data;
-	char *charptr = data;
-	char buf[10], line[80];
-
-	printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
-	       data);
-	for (i = 0; i < length; i += 16) {
-		line[0] = 0;
-		for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
-			sprintf(buf, " %08x", intptr[i / 4 + j]);
-			strcat(line, buf);
-		}
-		buf[0] = ' ';
-		buf[2] = 0;
-		for (j = 0; (j < 16) && (i + j < length); j++) {
-			buf[1] =
-			    isprint(charptr[i + j]) ? charptr[i + j] : '.';
-			strcat(line, buf);
-		}
-		printk("%s\n", line);
-	}
-}
-#endif
-
 #ifdef PROC_FS_JFS /* see jfs_debug.h */
 
 static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b2..044c1e654cc 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
 
 extern int jfsloglevel;
 
-extern void dump_mem(char *label, void *data, int length);
 extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 
 /* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
  *	---------
  */
 #else				/* CONFIG_JFS_DEBUG */
-#define dump_mem(label,data,length) do {} while (0)
 #define ASSERT(p) do {} while (0)
 #define jfs_info(fmt, arg...) do {} while (0)
 #define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383..c387540d342 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
 #define _H_JFS_DINODE
 
 /*
- *      jfs_dinode.h: on-disk inode manager
+ *	jfs_dinode.h: on-disk inode manager
  */
 
-#define INODESLOTSIZE           128
-#define L2INODESLOTSIZE         7
-#define log2INODESIZE           9	/* log2(bytes per dinode) */
+#define INODESLOTSIZE		128
+#define L2INODESLOTSIZE		7
+#define log2INODESIZE		9	/* log2(bytes per dinode) */
 
 
 /*
- *      on-disk inode : 512 bytes
+ *	on-disk inode : 512 bytes
  *
  * note: align 64-bit fields on 8-byte boundary.
  */
 struct dinode {
 	/*
-	 *      I. base area (128 bytes)
-	 *      ------------------------
+	 *	I. base area (128 bytes)
+	 *	------------------------
 	 *
 	 * define generic/POSIX attributes
 	 */
@@ -70,16 +70,16 @@ struct dinode {
 	__le32 di_acltype;	/* 4: Type of ACL */
 
 	/*
-	 *      Extension Areas.
+	 *	Extension Areas.
 	 *
-	 *      Historically, the inode was partitioned into 4 128-byte areas,
-	 *      the last 3 being defined as unions which could have multiple
-	 *      uses.  The first 96 bytes had been completely unused until
-	 *      an index table was added to the directory.  It is now more
-	 *      useful to describe the last 3/4 of the inode as a single
-	 *      union.  We would probably be better off redesigning the
-	 *      entire structure from scratch, but we don't want to break
-	 *      commonality with OS/2's JFS at this time.
+	 *	Historically, the inode was partitioned into 4 128-byte areas,
+	 *	the last 3 being defined as unions which could have multiple
+	 *	uses.  The first 96 bytes had been completely unused until
+	 *	an index table was added to the directory.  It is now more
+	 *	useful to describe the last 3/4 of the inode as a single
+	 *	union.  We would probably be better off redesigning the
+	 *	entire structure from scratch, but we don't want to break
+	 *	commonality with OS/2's JFS at this time.
 	 */
 	union {
 		struct {
@@ -95,7 +95,7 @@ struct dinode {
 		} _dir;					/* (384) */
 #define di_dirtable	u._dir._table
 #define di_dtroot	u._dir._dtroot
-#define di_parent       di_dtroot.header.idotdot
+#define di_parent	di_dtroot.header.idotdot
 #define di_DASD		di_dtroot.header.DASD
 
 		struct {
@@ -127,14 +127,14 @@ struct dinode {
 #define di_inlinedata	u._file._u2._special._u
 #define di_rdev		u._file._u2._special._u._rdev
 #define di_fastsymlink	u._file._u2._special._u._fastsymlink
-#define di_inlineea     u._file._u2._special._inlineea
+#define di_inlineea	u._file._u2._special._inlineea
 	} u;
 };
 
 /* extended mode bits (on-disk inode di_mode) */
-#define IFJOURNAL       0x00010000	/* journalled file */
-#define ISPARSE         0x00020000	/* sparse file enabled */
-#define INLINEEA        0x00040000	/* inline EA area free */
+#define IFJOURNAL	0x00010000	/* journalled file */
+#define ISPARSE		0x00020000	/* sparse file enabled */
+#define INLINEEA	0x00040000	/* inline EA area free */
 #define ISWAPFILE	0x00800000	/* file open for pager swap space */
 
 /* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb2228..e1985066b1c 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
  *		the in-core descriptor is initialized from disk.
  *
  * PARAMETERS:
- *      ipbmap	-  pointer to in-core inode for the block map.
+ *	ipbmap	- pointer to in-core inode for the block map.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOMEM	- insufficient memory
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOMEM	- insufficient memory
+ *	-EIO	- i/o error
  */
 int dbMount(struct inode *ipbmap)
 {
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
  *		the memory for this descriptor is freed.
  *
  * PARAMETERS:
- *      ipbmap	-  pointer to in-core inode for the block map.
+ *	ipbmap	- pointer to in-core inode for the block map.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  */
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
  *		at a time.
  *
  * PARAMETERS:
- *      ip	-  pointer to in-core inode;
- *      blkno	-  starting block number to be freed.
- *      nblocks	-  number of blocks to be freed.
+ *	ip	- pointer to in-core inode;
+ *	blkno	- starting block number to be freed.
+ *	nblocks	- number of blocks to be freed.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  */
 int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 /*
  * NAME:	dbUpdatePMap()
  *
- * FUNCTION:    update the allocation state (free or allocate) of the
+ * FUNCTION:	update the allocation state (free or allocate) of the
  *		specified block range in the persistent block allocation map.
  *
  *		the blocks will be updated in the persistent map one
  *		dmap at a time.
  *
  * PARAMETERS:
- *      ipbmap	-  pointer to in-core inode for the block map.
- *      free	-  'true' if block range is to be freed from the persistent
- *		   map; 'false' if it is to   be allocated.
- *      blkno	-  starting block number of the range.
- *      nblocks	-  number of contiguous blocks in the range.
- *      tblk	-  transaction block;
+ *	ipbmap	- pointer to in-core inode for the block map.
+ *	free	- 'true' if block range is to be freed from the persistent
+ *		  map; 'false' if it is to be allocated.
+ *	blkno	- starting block number of the range.
+ *	nblocks	- number of contiguous blocks in the range.
+ *	tblk	- transaction block;
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  */
 int
 dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
 /*
  * NAME:	dbNextAG()
  *
- * FUNCTION:    find the preferred allocation group for new allocations.
+ * FUNCTION:	find the preferred allocation group for new allocations.
  *
  *		Within the allocation groups, we maintain a preferred
  *		allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
  *		empty ags around for large allocations.
  *
  * PARAMETERS:
- *      ipbmap	-  pointer to in-core inode for the block map.
+ *	ipbmap	- pointer to in-core inode for the block map.
  *
  * RETURN VALUES:
- *      the preferred allocation group number.
+ *	the preferred allocation group number.
  */
 int dbNextAG(struct inode *ipbmap)
 {
@@ -656,7 +656,7 @@ unlock:
 /*
  * NAME:	dbAlloc()
  *
- * FUNCTION:    attempt to allocate a specified number of contiguous free
+ * FUNCTION:	attempt to allocate a specified number of contiguous free
  *		blocks from the working allocation block map.
  *
  *		the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
  *		size or requests that specify no hint value.
  *
  * PARAMETERS:
- *      ip	-  pointer to in-core inode;
- *      hint	- allocation hint.
- *      nblocks	- number of contiguous blocks in the range.
- *      results	- on successful return, set to the starting block number
+ *	ip	- pointer to in-core inode;
+ *	hint	- allocation hint.
+ *	nblocks	- number of contiguous blocks in the range.
+ *	results	- on successful return, set to the starting block number
  *		  of the newly allocated contiguous range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  */
 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 {
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	/* assert that nblocks is valid */
 	assert(nblocks > 0);
 
-#ifdef _STILL_TO_PORT
-	/* DASD limit check                                     F226941 */
-	if (OVER_LIMIT(ip, nblocks))
-		return -ENOSPC;
-#endif				/* _STILL_TO_PORT */
-
 	/* get the log2 number of blocks to be allocated.
 	 * if the number of blocks is not a log2 multiple,
 	 * it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 
 	bmp = JFS_SBI(ip->i_sb)->bmap;
 
-//retry:        /* serialize w.r.t.extendfs() */
 	mapSize = bmp->db_mapsize;
 
 	/* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 /*
  * NAME:	dbAllocExact()
  *
- * FUNCTION:    try to allocate the requested extent;
+ * FUNCTION:	try to allocate the requested extent;
  *
  * PARAMETERS:
- *      ip	- pointer to in-core inode;
- *      blkno	- extent address;
- *      nblocks	- extent length;
+ *	ip	- pointer to in-core inode;
+ *	blkno	- extent address;
+ *	nblocks	- extent length;
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  */
 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 {
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 /*
  * NAME:	dbReAlloc()
  *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:	attempt to extend a current allocation by a specified
  *		number of blocks.
  *
  *		this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
  *		number of blocks required.
  *
  * PARAMETERS:
- *      ip	    -  pointer to in-core inode requiring allocation.
- *      blkno	    -  starting block of the current allocation.
- *      nblocks	    -  number of contiguous blocks within the current
+ *	ip	    -  pointer to in-core inode requiring allocation.
+ *	blkno	    -  starting block of the current allocation.
+ *	nblocks	    -  number of contiguous blocks within the current
  *		       allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
- *      results	-      on successful return, set to the starting block number
+ *	addnblocks  -  number of blocks to add to the allocation.
+ *	results	-      on successful return, set to the starting block number
  *		       of the existing allocation if the existing allocation
  *		       was extended in place or to a newly allocated contiguous
  *		       range if the existing allocation could not be extended
  *		       in place.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  */
 int
 dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
 /*
  * NAME:	dbExtend()
  *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:	attempt to extend a current allocation by a specified
  *		number of blocks.
  *
  *		this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
  *		immediately following the current allocation.
  *
  * PARAMETERS:
- *      ip	    -  pointer to in-core inode requiring allocation.
- *      blkno	    -  starting block of the current allocation.
- *      nblocks	    -  number of contiguous blocks within the current
+ *	ip	    -  pointer to in-core inode requiring allocation.
+ *	blkno	    -  starting block of the current allocation.
+ *	nblocks	    -  number of contiguous blocks within the current
  *		       allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
+ *	addnblocks  -  number of blocks to add to the allocation.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  */
 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 {
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 /*
  * NAME:	dbAllocNext()
  *
- * FUNCTION:    attempt to allocate the blocks of the specified block
+ * FUNCTION:	attempt to allocate the blocks of the specified block
  *		range within a dmap.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap.
- *      blkno	-  starting block number of the range.
- *      nblocks	-  number of contiguous free blocks of the range.
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap.
+ *	blkno	-  starting block number of the range.
+ *	nblocks	-  number of contiguous free blocks of the range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
  */
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
  * NAME:	dbAllocNear()
  *
- * FUNCTION:    attempt to allocate a number of contiguous free blocks near
+ * FUNCTION:	attempt to allocate a number of contiguous free blocks near
  *		a specified block (hint) within a dmap.
  *
  *		starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
  *		the desired free space.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap.
- *      blkno	-  block number to allocate near.
- *      nblocks	-  actual number of contiguous free blocks desired.
- *      l2nb	-  log2 number of contiguous free blocks desired.
- *      results	-  on successful return, set to the starting block number
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap.
+ *	blkno	-  block number to allocate near.
+ *	nblocks	-  actual number of contiguous free blocks desired.
+ *	l2nb	-  log2 number of contiguous free blocks desired.
+ *	results	-  on successful return, set to the starting block number
  *		   of the newly allocated range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
  */
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
 /*
  * NAME:	dbAllocAG()
  *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:	attempt to allocate the specified number of contiguous
  *		free blocks within the specified allocation group.
  *
  *		unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
  *		the allocation group.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
+ *	bmp	-  pointer to bmap descriptor
  *	agno	- allocation group number.
- *      nblocks	-  actual number of contiguous free blocks desired.
- *      l2nb	-  log2 number of contiguous free blocks desired.
- *      results	-  on successful return, set to the starting block number
+ *	nblocks	-  actual number of contiguous free blocks desired.
+ *	l2nb	-  log2 number of contiguous free blocks desired.
+ *	results	-  on successful return, set to the starting block number
  *		   of the newly allocated range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * note: IWRITE_LOCK(ipmap) held on entry/exit;
  */
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 /*
  * NAME:	dbAllocAny()
  *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:	attempt to allocate the specified number of contiguous
  *		free blocks anywhere in the file system.
  *
  *		dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
  *		desired free space is allocated.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      nblocks	 -  actual number of contiguous free blocks desired.
- *      l2nb	 -  log2 number of contiguous free blocks desired.
- *      results	-  on successful return, set to the starting block number
+ *	bmp	-  pointer to bmap descriptor
+ *	nblocks	 -  actual number of contiguous free blocks desired.
+ *	l2nb	 -  log2 number of contiguous free blocks desired.
+ *	results	-  on successful return, set to the starting block number
  *		   of the newly allocated range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 /*
  * NAME:	dbFindCtl()
  *
- * FUNCTION:    starting at a specified dmap control page level and block
+ * FUNCTION:	starting at a specified dmap control page level and block
  *		number, search down the dmap control levels for a range of
- *	        contiguous free blocks large enough to satisfy an allocation
+ *		contiguous free blocks large enough to satisfy an allocation
  *		request for the specified number of free blocks.
  *
  *		if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
  *		is sufficient in size.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      level	-  starting dmap control page level.
- *      l2nb	-  log2 number of contiguous free blocks desired.
- *      *blkno	-  on entry, starting block number for conducting the search.
+ *	bmp	-  pointer to bmap descriptor
+ *	level	-  starting dmap control page level.
+ *	l2nb	-  log2 number of contiguous free blocks desired.
+ *	*blkno	-  on entry, starting block number for conducting the search.
  *		   on successful return, the first block within a dmap page
  *		   that contains or starts a range of contiguous free blocks.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
 /*
  * NAME:	dbAllocCtl()
  *
- * FUNCTION:    attempt to allocate a specified number of contiguous
+ * FUNCTION:	attempt to allocate a specified number of contiguous
  *		blocks starting within a specific dmap.
  *
  *		this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
  *		first dmap (i.e. blkno).
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      nblocks	 -  actual number of contiguous free blocks to allocate.
- *      l2nb	 -  log2 number of contiguous free blocks to allocate.
- *      blkno	 -  starting block number of the dmap to start the allocation
+ *	bmp	-  pointer to bmap descriptor
+ *	nblocks	 -  actual number of contiguous free blocks to allocate.
+ *	l2nb	 -  log2 number of contiguous free blocks to allocate.
+ *	blkno	 -  starting block number of the dmap to start the allocation
  *		    from.
- *      results	-  on successful return, set to the starting block number
+ *	results	-  on successful return, set to the starting block number
  *		   of the newly allocated range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
 /*
  * NAME:	dbAllocDmapLev()
  *
- * FUNCTION:    attempt to allocate a specified number of contiguous blocks
+ * FUNCTION:	attempt to allocate a specified number of contiguous blocks
  *		from a specified dmap.
  *
  *		this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
  *		returned.
  *
  * PARAMETERS:
- *      mp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap to attempt to allocate blocks from.
- *      l2nb	-  log2 number of contiguous block desired.
- *      nblocks	-  actual number of contiguous block desired.
- *      results	-  on successful return, set to the starting block number
+ *	mp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap to attempt to allocate blocks from.
+ *	l2nb	-  log2 number of contiguous block desired.
+ *	nblocks	-  actual number of contiguous block desired.
+ *	results	-  on successful return, set to the starting block number
  *		   of the newly allocated range.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient disk resources
- *      -EIO	- i/o error
+ *	0	- success
+ *	-ENOSPC	- insufficient disk resources
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
  *	IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
 /*
  * NAME:	dbAllocDmap()
  *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:	adjust the disk allocation map to reflect the allocation
  *		of a specified block range within a dmap.
  *
  *		this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
  *		covers this dmap.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap to allocate the block range from.
- *      blkno	-  starting block number of the block to be allocated.
- *      nblocks	-  number of blocks to be allocated.
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap to allocate the block range from.
+ *	blkno	-  starting block number of the block to be allocated.
+ *	nblocks	-  number of blocks to be allocated.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
  * NAME:	dbFreeDmap()
  *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:	adjust the disk allocation map to reflect the allocation
  *		of a specified block range within a dmap.
  *
  *		this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
  *		causes the maximum string of free blocks within the dmap to
  *		change (i.e. the value of the root of the dmap's dmtree), this
  *		routine will cause this change to be reflected up through the
- *	        appropriate levels of the dmap control pages by a call to
+ *		appropriate levels of the dmap control pages by a call to
  *		dbAdjCtl() for the L0 dmap control page that covers this dmap.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap to free the block range from.
- *      blkno	-  starting block number of the block to be freed.
- *      nblocks	-  number of blocks to be freed.
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap to free the block range from.
+ *	blkno	-  starting block number of the block to be freed.
+ *	nblocks	-  number of blocks to be freed.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
  * NAME:	dbAllocBits()
  *
- * FUNCTION:    allocate a specified block range from a dmap.
+ * FUNCTION:	allocate a specified block range from a dmap.
  *
  *		this routine updates the dmap to reflect the working
  *		state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
  *		dmap's dmtree, as a whole, to reflect the allocated range.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap to allocate bits from.
- *      blkno	-  starting block number of the bits to be allocated.
- *      nblocks	-  number of bits to be allocated.
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap to allocate bits from.
+ *	blkno	-  starting block number of the bits to be allocated.
+ *	nblocks	-  number of bits to be allocated.
  *
  * RETURN VALUES: none
  *
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 			 * the allocated words.
 			 */
 			for (; nwords > 0; nwords -= nw) {
-			        if (leaf[word] < BUDMIN) {
+				if (leaf[word] < BUDMIN) {
 					jfs_error(bmp->db_ipbmap->i_sb,
 						  "dbAllocBits: leaf page "
 						  "corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
  * NAME:	dbFreeBits()
  *
- * FUNCTION:    free a specified block range from a dmap.
+ * FUNCTION:	free a specified block range from a dmap.
  *
  *		this routine updates the dmap to reflect the working
  *		state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
  *		dmtree, as a whole, to reflect the deallocated range.
  *
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      dp	-  pointer to dmap to free bits from.
- *      blkno	-  starting block number of the bits to be freed.
- *      nblocks	-  number of bits to be freed.
+ *	bmp	-  pointer to bmap descriptor
+ *	dp	-  pointer to dmap to free bits from.
+ *	blkno	-  starting block number of the bits to be freed.
+ *	nblocks	-  number of bits to be freed.
  *
  * RETURN VALUES: 0 for success
  *
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
  *		the new root value and the next dmap control page level to
  *		be adjusted.
  * PARAMETERS:
- *      bmp	-  pointer to bmap descriptor
- *      blkno	-  the first block of a block range within a dmap.  it is
+ *	bmp	-  pointer to bmap descriptor
+ *	blkno	-  the first block of a block range within a dmap.  it is
  *		   the allocation or deallocation of this block range that
  *		   requires the dmap control page to be adjusted.
- *      newval	-  the new value of the lower level dmap or dmap control
+ *	newval	-  the new value of the lower level dmap or dmap control
  *		   page root.
- *      alloc	-  'true' if adjustment is due to an allocation.
- *      level	-  current level of dmap control page (i.e. L0, L1, L2) to
+ *	alloc	-  'true' if adjustment is due to an allocation.
+ *	level	-  current level of dmap control page (i.e. L0, L1, L2) to
  *		   be adjusted.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  *
  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
  */
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
 /*
  * NAME:	dbSplit()
  *
- * FUNCTION:    update the leaf of a dmtree with a new value, splitting
+ * FUNCTION:	update the leaf of a dmtree with a new value, splitting
  *		the leaf from the binary buddy system of the dmtree's
  *		leaves, as required.
  *
  * PARAMETERS:
- *      tp	- pointer to the tree containing the leaf.
- *      leafno	- the number of the leaf to be updated.
- *      splitsz	- the size the binary buddy system starting at the leaf
+ *	tp	- pointer to the tree containing the leaf.
+ *	leafno	- the number of the leaf to be updated.
+ *	splitsz	- the size the binary buddy system starting at the leaf
  *		  must be split to, specified as the log2 number of blocks.
- *      newval	- the new value for the leaf.
+ *	newval	- the new value for the leaf.
  *
  * RETURN VALUES: none
  *
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
 /*
  * NAME:	dbBackSplit()
  *
- * FUNCTION:    back split the binary buddy system of dmtree leaves
+ * FUNCTION:	back split the binary buddy system of dmtree leaves
  *		that hold a specified leaf until the specified leaf
  *		starts its own binary buddy system.
  *
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
  *		in which a previous join operation must be backed out.
  *
  * PARAMETERS:
- *      tp	- pointer to the tree containing the leaf.
- *      leafno	- the number of the leaf to be updated.
+ *	tp	- pointer to the tree containing the leaf.
+ *	leafno	- the number of the leaf to be updated.
  *
  * RETURN VALUES: none
  *
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
 /*
  * NAME:	dbJoin()
  *
- * FUNCTION:    update the leaf of a dmtree with a new value, joining
+ * FUNCTION:	update the leaf of a dmtree with a new value, joining
  *		the leaf with other leaves of the dmtree into a multi-leaf
  *		binary buddy system, as required.
  *
  * PARAMETERS:
- *      tp	- pointer to the tree containing the leaf.
- *      leafno	- the number of the leaf to be updated.
- *      newval	- the new value for the leaf.
+ *	tp	- pointer to the tree containing the leaf.
+ *	leafno	- the number of the leaf to be updated.
+ *	newval	- the new value for the leaf.
  *
  * RETURN VALUES: none
  */
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
 /*
  * NAME:	dbAdjTree()
  *
- * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
+ * FUNCTION:	update a leaf of a dmtree with a new value, adjusting
  *		the dmtree, as required, to reflect the new leaf value.
  *		the combination of any buddies must already be done before
  *		this is called.
  *
  * PARAMETERS:
- *      tp	- pointer to the tree to be adjusted.
- *      leafno	- the number of the leaf to be updated.
- *      newval	- the new value for the leaf.
+ *	tp	- pointer to the tree to be adjusted.
+ *	leafno	- the number of the leaf to be updated.
+ *	newval	- the new value for the leaf.
  *
  * RETURN VALUES: none
  */
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
 /*
  * NAME:	dbFindLeaf()
  *
- * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
+ * FUNCTION:	search a dmtree_t for sufficient free blocks, returning
  *		the index of a leaf describing the free blocks if
  *		sufficient free blocks are found.
  *
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
  *		free space.
  *
  * PARAMETERS:
- *      tp	- pointer to the tree to be searched.
- *      l2nb	- log2 number of free blocks to search for.
+ *	tp	- pointer to the tree to be searched.
+ *	l2nb	- log2 number of free blocks to search for.
  *	leafidx	- return pointer to be set to the index of the leaf
  *		  describing at least l2nb free blocks if sufficient
  *		  free blocks are found.
  *
  * RETURN VALUES:
- *      0	- success
- *      -ENOSPC	- insufficient free blocks.
+ *	0	- success
+ *	-ENOSPC	- insufficient free blocks.
  */
 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 {
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 /*
  * NAME:	dbFindBits()
  *
- * FUNCTION:    find a specified number of binary buddy free bits within a
+ * FUNCTION:	find a specified number of binary buddy free bits within a
  *		dmap bitmap word value.
  *
  *		this routine searches the bitmap value for (1 << l2nb) free
  *		bits at (1 << l2nb) alignments within the value.
  *
  * PARAMETERS:
- *      word	-  dmap bitmap word value.
- *      l2nb	-  number of free bits specified as a log2 number.
+ *	word	-  dmap bitmap word value.
+ *	l2nb	-  number of free bits specified as a log2 number.
  *
  * RETURN VALUES:
- *      starting bit number of free bits.
+ *	starting bit number of free bits.
  */
 static int dbFindBits(u32 word, int l2nb)
 {
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
 /*
  * NAME:	dbMaxBud(u8 *cp)
  *
- * FUNCTION:    determine the largest binary buddy string of free
+ * FUNCTION:	determine the largest binary buddy string of free
  *		bits within 32-bits of the map.
  *
  * PARAMETERS:
- *      cp	-  pointer to the 32-bit value.
+ *	cp	-  pointer to the 32-bit value.
  *
  * RETURN VALUES:
- *      largest binary buddy of free bits within a dmap word.
+ *	largest binary buddy of free bits within a dmap word.
  */
 static int dbMaxBud(u8 * cp)
 {
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
 /*
  * NAME:	cnttz(uint word)
  *
- * FUNCTION:    determine the number of trailing zeros within a 32-bit
+ * FUNCTION:	determine the number of trailing zeros within a 32-bit
  *		value.
  *
  * PARAMETERS:
- *      value	-  32-bit value to be examined.
+ *	value	-  32-bit value to be examined.
  *
  * RETURN VALUES:
- *      count of trailing zeros
+ *	count of trailing zeros
  */
 static int cnttz(u32 word)
 {
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
 /*
  * NAME:	cntlz(u32 value)
  *
- * FUNCTION:    determine the number of leading zeros within a 32-bit
+ * FUNCTION:	determine the number of leading zeros within a 32-bit
  *		value.
  *
  * PARAMETERS:
- *      value	-  32-bit value to be examined.
+ *	value	-  32-bit value to be examined.
  *
  * RETURN VALUES:
- *      count of leading zeros
+ *	count of leading zeros
  */
 static int cntlz(u32 value)
 {
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
  * NAME:	blkstol2(s64 nb)
  *
  * FUNCTION:	convert a block count to its log2 value. if the block
- *	        count is not a l2 multiple, it is rounded up to the next
+ *		count is not a l2 multiple, it is rounded up to the next
  *		larger l2 multiple.
  *
  * PARAMETERS:
- *      nb	-  number of blocks
+ *	nb	-  number of blocks
  *
  * RETURN VALUES:
- *      log2 number of blocks
+ *	log2 number of blocks
  */
 static int blkstol2(s64 nb)
 {
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
  *		at a time.
  *
  * PARAMETERS:
- *      ip	-  pointer to in-core inode;
- *      blkno	-  starting block number to be freed.
- *      nblocks	-  number of blocks to be freed.
+ *	ip	-  pointer to in-core inode;
+ *	blkno	-  starting block number to be freed.
+ *	nblocks	-  number of blocks to be freed.
  *
  * RETURN VALUES:
- *      0	- success
- *      -EIO	- i/o error
+ *	0	- success
+ *	-EIO	- i/o error
  */
 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
  * L2
  *  |
  *   L1---------------------------------L1
- *    |                                  |
- *     L0---------L0---------L0           L0---------L0---------L0
- *      |          |          |            |          |          |
- *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
+ *    |					 |
+ *     L0---------L0---------L0		  L0---------L0---------L0
+ *      |	   |	      |		   |	      |		 |
+ *	 d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
  * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
  *
  * <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
 		 (long long) blkno, (long long) nblocks, (long long) newsize);
 
 	/*
-	 *      initialize bmap control page.
+	 *	initialize bmap control page.
 	 *
 	 * all the data in bmap control page should exclude
 	 * the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
 	bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
 
 	/*
-	 *      reconfigure db_agfree[]
+	 *	reconfigure db_agfree[]
 	 * from old AG configuration to new AG configuration;
 	 *
 	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
 	bmp->db_maxag = bmp->db_maxag / k;
 
 	/*
-	 *      extend bmap
+	 *	extend bmap
 	 *
 	 * update bit maps and corresponding level control pages;
 	 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
 			/* compute start L0 */
 			j = 0;
 			l1leaf = l1dcp->stree + CTLLEAFIND;
-			p += nbperpage;	/* 1st L0 of L1.k  */
+			p += nbperpage;	/* 1st L0 of L1.k */
 		}
 
 		/*
@@ -3548,7 +3541,7 @@ errout:
 	return -EIO;
 
 	/*
-	 *      finalize bmap control page
+	 *	finalize bmap control page
 	 */
 finalize:
 
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
 	int i, n;
 
 	/*
-	 *      finalize bmap control page
+	 *	finalize bmap control page
 	 */
 //finalize:
 	/*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
  * convert number of map pages to the zero origin top dmapctl level
  */
 #define BMAPPGTOLEV(npages)	\
-	(((npages) <= 3 + MAXL0PAGES) ? 0 \
-       : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
+	(((npages) <= 3 + MAXL0PAGES) ? 0 : \
+	 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
 
 s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
 {
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
 		factor =
 		    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
 		complete = (u32) npages / factor;
-		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
-				      : ((i == 1) ? LPERCTL : 1));
+		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
+				      ((i == 1) ? LPERCTL : 1));
 
 		/* pages in last/incomplete child */
 		npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74b..11e6d471b36 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
  *	- 1 is added to account for the control page of the map.
  */
 #define BLKTODMAP(b,s)    \
-        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
+	((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
 
 /*
  * convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
  *	- 1 is added to account for the control page of the map.
  */
 #define BLKTOL0(b,s)      \
-        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
+	(((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
 
 /*
  * convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
  * at the specified level which describes the disk block.
  */
 #define BLKTOCTL(b,s,l)   \
-        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
+	(((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
 
 /*
  * convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
  * dmaptree must be consistent with dmapctl.
  */
 struct dmaptree {
-	__le32 nleafs;		/* 4: number of tree leafs      */
-	__le32 l2nleafs;	/* 4: l2 number of tree leafs   */
-	__le32 leafidx;		/* 4: index of first tree leaf  */
-	__le32 height;		/* 4: height of the tree        */
+	__le32 nleafs;		/* 4: number of tree leafs	*/
+	__le32 l2nleafs;	/* 4: l2 number of tree leafs	*/
+	__le32 leafidx;		/* 4: index of first tree leaf	*/
+	__le32 height;		/* 4: height of the tree	*/
 	s8 budmin;		/* 1: min l2 tree leaf value to combine */
-	s8 stree[TREESIZE];	/* TREESIZE: tree               */
-	u8 pad[2];		/* 2: pad to word boundary      */
-};				/* - 360 -                      */
+	s8 stree[TREESIZE];	/* TREESIZE: tree		*/
+	u8 pad[2];		/* 2: pad to word boundary	*/
+};				/* - 360 -			*/
 
 /*
  *	dmap page per 8K blocks bitmap
  */
 struct dmap {
-	__le32 nblocks;		/* 4: num blks covered by this dmap     */
-	__le32 nfree;		/* 4: num of free blks in this dmap     */
-	__le64 start;		/* 8: starting blkno for this dmap      */
-	struct dmaptree tree;	/* 360: dmap tree                       */
-	u8 pad[1672];		/* 1672: pad to 2048 bytes              */
-	__le32 wmap[LPERDMAP];	/* 1024: bits of the working map        */
-	__le32 pmap[LPERDMAP];	/* 1024: bits of the persistent map     */
-};				/* - 4096 -                             */
+	__le32 nblocks;		/* 4: num blks covered by this dmap	*/
+	__le32 nfree;		/* 4: num of free blks in this dmap	*/
+	__le64 start;		/* 8: starting blkno for this dmap	*/
+	struct dmaptree tree;	/* 360: dmap tree			*/
+	u8 pad[1672];		/* 1672: pad to 2048 bytes		*/
+	__le32 wmap[LPERDMAP];	/* 1024: bits of the working map	*/
+	__le32 pmap[LPERDMAP];	/* 1024: bits of the persistent map	*/
+};				/* - 4096 -				*/
 
 /*
  *	disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
  * dmapctl must be consistent with dmaptree.
  */
 struct dmapctl {
-	__le32 nleafs;		/* 4: number of tree leafs      */
-	__le32 l2nleafs;	/* 4: l2 number of tree leafs   */
-	__le32 leafidx;		/* 4: index of the first tree leaf      */
-	__le32 height;		/* 4: height of tree            */
-	s8 budmin;		/* 1: minimum l2 tree leaf value        */
-	s8 stree[CTLTREESIZE];	/* CTLTREESIZE: dmapctl tree    */
-	u8 pad[2714];		/* 2714: pad to 4096            */
-};				/* - 4096 -                     */
+	__le32 nleafs;		/* 4: number of tree leafs	*/
+	__le32 l2nleafs;	/* 4: l2 number of tree leafs	*/
+	__le32 leafidx;		/* 4: index of the first tree leaf	*/
+	__le32 height;		/* 4: height of tree		*/
+	s8 budmin;		/* 1: minimum l2 tree leaf value	*/
+	s8 stree[CTLTREESIZE];	/* CTLTREESIZE: dmapctl tree	*/
+	u8 pad[2714];		/* 2714: pad to 4096		*/
+};				/* - 4096 -			*/
 
 /*
  *	common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
  *	on-disk aggregate disk allocation map descriptor.
  */
 struct dbmap_disk {
-	__le64 dn_mapsize;	/* 8: number of blocks in aggregate     */
-	__le64 dn_nfree;	/* 8: num free blks in aggregate map    */
-	__le32 dn_l2nbperpage;	/* 4: number of blks per page           */
-	__le32 dn_numag;	/* 4: total number of ags               */
-	__le32 dn_maxlevel;	/* 4: number of active ags              */
-	__le32 dn_maxag;	/* 4: max active alloc group number     */
-	__le32 dn_agpref;	/* 4: preferred alloc group (hint)      */
-	__le32 dn_aglevel;	/* 4: dmapctl level holding the AG      */
-	__le32 dn_agheigth;	/* 4: height in dmapctl of the AG       */
-	__le32 dn_agwidth;	/* 4: width in dmapctl of the AG        */
-	__le32 dn_agstart;	/* 4: start tree index at AG height     */
-	__le32 dn_agl2size;	/* 4: l2 num of blks per alloc group    */
-	__le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count           */
-	__le64 dn_agsize;	/* 8: num of blks per alloc group       */
-	s8 dn_maxfreebud;	/* 1: max free buddy system             */
-	u8 pad[3007];		/* 3007: pad to 4096                    */
-};				/* - 4096 -                             */
+	__le64 dn_mapsize;	/* 8: number of blocks in aggregate	*/
+	__le64 dn_nfree;	/* 8: num free blks in aggregate map	*/
+	__le32 dn_l2nbperpage;	/* 4: number of blks per page		*/
+	__le32 dn_numag;	/* 4: total number of ags		*/
+	__le32 dn_maxlevel;	/* 4: number of active ags		*/
+	__le32 dn_maxag;	/* 4: max active alloc group number	*/
+	__le32 dn_agpref;	/* 4: preferred alloc group (hint)	*/
+	__le32 dn_aglevel;	/* 4: dmapctl level holding the AG	*/
+	__le32 dn_agheigth;	/* 4: height in dmapctl of the AG	*/
+	__le32 dn_agwidth;	/* 4: width in dmapctl of the AG	*/
+	__le32 dn_agstart;	/* 4: start tree index at AG height	*/
+	__le32 dn_agl2size;	/* 4: l2 num of blks per alloc group	*/
+	__le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count		*/
+	__le64 dn_agsize;	/* 8: num of blks per alloc group	*/
+	s8 dn_maxfreebud;	/* 1: max free buddy system		*/
+	u8 pad[3007];		/* 3007: pad to 4096			*/
+};				/* - 4096 -				*/
 
 struct dbmap {
-	s64 dn_mapsize;		/* number of blocks in aggregate     */
-	s64 dn_nfree;		/* num free blks in aggregate map    */
-	int dn_l2nbperpage;	/* number of blks per page           */
-	int dn_numag;		/* total number of ags               */
-	int dn_maxlevel;	/* number of active ags              */
-	int dn_maxag;		/* max active alloc group number     */
-	int dn_agpref;		/* preferred alloc group (hint)      */
-	int dn_aglevel;		/* dmapctl level holding the AG      */
-	int dn_agheigth;	/* height in dmapctl of the AG       */
-	int dn_agwidth;		/* width in dmapctl of the AG        */
-	int dn_agstart;		/* start tree index at AG height     */
-	int dn_agl2size;	/* l2 num of blks per alloc group    */
-	s64 dn_agfree[MAXAG];	/* per AG free count           */
-	s64 dn_agsize;		/* num of blks per alloc group       */
-	signed char dn_maxfreebud;	/* max free buddy system             */
-};				/* - 4096 -                             */
+	s64 dn_mapsize;		/* number of blocks in aggregate	*/
+	s64 dn_nfree;		/* num free blks in aggregate map	*/
+	int dn_l2nbperpage;	/* number of blks per page		*/
+	int dn_numag;		/* total number of ags			*/
+	int dn_maxlevel;	/* number of active ags			*/
+	int dn_maxag;		/* max active alloc group number	*/
+	int dn_agpref;		/* preferred alloc group (hint)		*/
+	int dn_aglevel;		/* dmapctl level holding the AG		*/
+	int dn_agheigth;	/* height in dmapctl of the AG		*/
+	int dn_agwidth;		/* width in dmapctl of the AG		*/
+	int dn_agstart;		/* start tree index at AG height	*/
+	int dn_agl2size;	/* l2 num of blks per alloc group	*/
+	s64 dn_agfree[MAXAG];	/* per AG free count			*/
+	s64 dn_agsize;		/* num of blks per alloc group		*/
+	signed char dn_maxfreebud;	/* max free buddy system	*/
+};				/* - 4096 -				*/
 /*
  *	in-memory aggregate disk allocation map descriptor.
  */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f322289..c14ba3cfa81 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
 	lv = &llck->lv[llck->index];
 
 	/*
-	 *      Linelock slot size is twice the size of directory table
-	 *      slot size.  512 entries per page.
+	 *	Linelock slot size is twice the size of directory table
+	 *	slot size.  512 entries per page.
 	 */
 	lv->offset = ((index - 2) & 511) >> 1;
 	lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 	btstack->nsplit = 1;
 
 	/*
-	 *      search down tree from root:
+	 *	search down tree from root:
 	 *
 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 			}
 			if (cmp == 0) {
 				/*
-				 *      search hit
+				 *	search hit
 				 */
 				/* search hit - leaf page:
 				 * return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 		}
 
 		/*
-		 *      search miss
+		 *	search miss
 		 *
 		 * base is the smallest index with key (Kj) greater than
 		 * search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
 	struct lv *lv;
 
 	/*
-	 *      retrieve search result
+	 *	retrieve search result
 	 *
 	 * dtSearch() returns (leaf page pinned, index at which to insert).
 	 * n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
 	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
 
 	/*
-	 *      insert entry for new key
+	 *	insert entry for new key
 	 */
 	if (DO_INDEX(ip)) {
 		if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
 	data.leaf.ino = *fsn;
 
 	/*
-	 *      leaf page does not have enough room for new entry:
+	 *	leaf page does not have enough room for new entry:
 	 *
-	 *      extend/split the leaf page;
+	 *	extend/split the leaf page;
 	 *
 	 * dtSplitUp() will insert the entry and unpin the leaf page.
 	 */
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
 	}
 
 	/*
-	 *      leaf page does have enough room for new entry:
+	 *	leaf page does have enough room for new entry:
 	 *
-	 *      insert the new data entry into the leaf page;
+	 *	insert the new data entry into the leaf page;
 	 */
 	BT_MARK_DIRTY(mp, ip);
 	/*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
 	}
 
 	/*
-	 *      split leaf page
+	 *	split leaf page
 	 *
 	 * The split routines insert the new entry, and
 	 * acquire txLock as appropriate.
 	 */
 	/*
-	 *      split root leaf page:
+	 *	split root leaf page:
 	 */
 	if (sp->header.flag & BT_ROOT) {
 		/*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
 	}
 
 	/*
-	 *      extend first leaf page
+	 *	extend first leaf page
 	 *
 	 * extend the 1st extent if less than buffer page size
 	 * (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
 	}
 
 	/*
-	 *      split leaf page <sp> into <sp> and a new right page <rp>.
+	 *	split leaf page <sp> into <sp> and a new right page <rp>.
 	 *
 	 * return <rp> pinned and its extent descriptor <rpxd>
 	 */
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 	rp->header.freecnt = rp->header.maxslot - fsi;
 
 	/*
-	 *      sequential append at tail: append without split
+	 *	sequential append at tail: append without split
 	 *
 	 * If splitting the last page on a level because of appending
 	 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 	}
 
 	/*
-	 *      non-sequential insert (at possibly middle page)
+	 *	non-sequential insert (at possibly middle page)
 	 */
 
 	/*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 	left = 0;
 
 	/*
-	 *      compute fill factor for split pages
+	 *	compute fill factor for split pages
 	 *
 	 * <nxt> traces the next entry to move to rp
 	 * <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 	/* <nxt> poins to the 1st entry to move */
 
 	/*
-	 *      move entries to right page
+	 *	move entries to right page
 	 *
 	 * dtMoveEntry() initializes rp and reserves entry for insertion
 	 *
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
 		return (rc);
 
 	/*
-	 *      extend the extent
+	 *	extend the extent
 	 */
 	pxdlist = split->pxdlist;
 	pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
 	}
 
 	/*
-	 *      extend the page
+	 *	extend the page
 	 */
 	sp->header.self = *pxd;
 
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
 	/* update buffer extent descriptor of extended page */
 	xlen = lengthPXD(pxd);
 	xsize = xlen << JFS_SBI(sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-	bmSetXD(smp, xaddr, xsize);
-#endif				/*  _STILL_TO_PORT */
 
 	/*
 	 * copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
 	}
 
 	/*
-	 *      update parent entry on the parent/root page
+	 *	update parent entry on the parent/root page
 	 */
 	/*
 	 * acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
 	sp = &JFS_IP(ip)->i_dtroot;
 
 	/*
-	 *      allocate/initialize a single (right) child page
+	 *	allocate/initialize a single (right) child page
 	 *
 	 * N.B. at first split, a one (or two) block to fit new entry
 	 * is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
 	rp->header.prev = 0;
 
 	/*
-	 *      move in-line root page into new right page extent
+	 *	move in-line root page into new right page extent
 	 */
 	/* linelock header + copied entries + new stbl (1st slot) in new page */
 	ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
 	dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
 
 	/*
-	 *      reset parent/root page
+	 *	reset parent/root page
 	 *
 	 * set the 1st entry offset to 0, which force the left-most key
 	 * at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
 	dtpage_t *np;
 
 	/*
-	 *      search for the entry to delete:
+	 *	search for the entry to delete:
 	 *
 	 * dtSearch() returns (leaf page pinned, index at which to delete).
 	 */
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 	int i;
 
 	/*
-	 *      keep the root leaf page which has become empty
+	 *	keep the root leaf page which has become empty
 	 */
 	if (BT_IS_ROOT(fmp)) {
 		/*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 	}
 
 	/*
-	 *      free the non-root leaf page
+	 *	free the non-root leaf page
 	 */
 	/*
 	 * acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 	discard_metapage(fmp);
 
 	/*
-	 *      propagate page deletion up the directory tree
+	 *	propagate page deletion up the directory tree
 	 *
 	 * If the delete from the parent page makes it empty,
 	 * continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 
 #ifdef _NOTYET
 /*
- * NAME:        dtRelocate()
+ * NAME:	dtRelocate()
  *
- * FUNCTION:    relocate dtpage (internal or leaf) of directory;
- *              This function is mainly used by defragfs utility.
+ * FUNCTION:	relocate dtpage (internal or leaf) of directory;
+ *		This function is mainly used by defragfs utility.
  */
 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 	       s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 		   xlen);
 
 	/*
-	 *      1. get the internal parent dtpage covering
-	 *      router entry for the tartget page to be relocated;
+	 *	1. get the internal parent dtpage covering
+	 *	router entry for the tartget page to be relocated;
 	 */
 	rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
 	if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 	jfs_info("dtRelocate: parent router entry validated.");
 
 	/*
-	 *      2. relocate the target dtpage
+	 *	2. relocate the target dtpage
 	 */
 	/* read in the target page from src extent */
 	DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 
 	/* update the buffer extent descriptor of the dtpage */
 	xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-	bmSetXD(mp, nxaddr, xsize);
-#endif /* _STILL_TO_PORT */
+
 	/* unpin the relocated page */
 	DT_PUTPAGE(mp);
 	jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 	 */
 
 	/*
-	 *      3. acquire maplock for the source extent to be freed;
+	 *	3. acquire maplock for the source extent to be freed;
 	 */
 	/* for dtpage relocation, write a LOG_NOREDOPAGE record
 	 * for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 	pxdlock->index = 1;
 
 	/*
-	 *      4. update the parent router entry for relocation;
+	 *	4. update the parent router entry for relocation;
 	 *
 	 * acquire tlck for the parent entry covering the target dtpage;
 	 * write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
  * NAME:	dtSearchNode()
  *
  * FUNCTION:	Search for an dtpage containing a specified address
- *              This function is mainly used by defragfs utility.
+ *		This function is mainly used by defragfs utility.
  *
  * NOTE:	Search result on stack, the found page is pinned at exit.
  *		The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
 	BT_CLR(btstack);	/* reset stack */
 
 	/*
-	 *      descend tree to the level with specified leftmost page
+	 *	descend tree to the level with specified leftmost page
 	 *
 	 *  by convention, root bn = 0.
 	 */
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
 	}
 
 	/*
-	 *      search each page at the current levevl
+	 *	search each page at the current levevl
 	 */
       loop:
 	stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (DO_INDEX(ip)) {
 		/*
 		 * persistent index is stored in directory entries.
-		 * Special cases:        0 = .
-		 *                       1 = ..
-		 *                      -1 = End of directory
+		 * Special cases:	 0 = .
+		 *			 1 = ..
+		 *			-1 = End of directory
 		 */
 		do_index = 1;
 
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		/*
 		 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
 		 *
-		 * pn = index = 0:      First entry "."
-		 * pn = 0; index = 1:   Second entry ".."
-		 * pn > 0:              Real entries, pn=1 -> leftmost page
-		 * pn = index = -1:     No more entries
+		 * pn = index = 0:	First entry "."
+		 * pn = 0; index = 1:	Second entry ".."
+		 * pn > 0:		Real entries, pn=1 -> leftmost page
+		 * pn = index = -1:	No more entries
 		 */
 		dtpos = filp->f_pos;
 		if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
 	BT_CLR(btstack);	/* reset stack */
 
 	/*
-	 *      descend leftmost path of the tree
+	 *	descend leftmost path of the tree
 	 *
 	 * by convention, root bn = 0.
 	 */
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
 	struct ldtentry *entry;
 
 	/*
-	 *      search for the entry to modify:
+	 *	search for the entry to modify:
 	 *
 	 * dtSearch() returns (leaf page pinned, index at which to modify).
 	 */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f7864..8561c6ecece 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
 
 
 /*
- *      entry segment/slot
+ *	entry segment/slot
  *
  * an entry consists of type dependent head/only segment/slot and
  * additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a80..7ae1e3281de 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
 #endif
 static s64 extRoundDown(s64 nb);
 
-#define DPD(a)          (printk("(a): %d\n",(a)))
-#define DPC(a)          (printk("(a): %c\n",(a)))
+#define DPD(a)		(printk("(a): %d\n",(a)))
+#define DPC(a)		(printk("(a): %c\n",(a)))
 #define DPL1(a)					\
 {						\
 	if ((a) >> 32)				\
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
 		printk("(a): %x\n",(a) << 32);	\
 }
 
-#define DPD1(a)         (printk("(a): %d  ",(a)))
-#define DPX(a)          (printk("(a): %08x\n",(a)))
-#define DPX1(a)         (printk("(a): %08x  ",(a)))
-#define DPS(a)          (printk("%s\n",(a)))
-#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
-#define DPE1(a)          (printk("\nENTERING: %s",(a)))
-#define DPS1(a)         (printk("  %s  ",(a)))
+#define DPD1(a)		(printk("(a): %d  ",(a)))
+#define DPX(a)		(printk("(a): %08x\n",(a)))
+#define DPX1(a)		(printk("(a): %08x  ",(a)))
+#define DPS(a)		(printk("%s\n",(a)))
+#define DPE(a)		(printk("\nENTERING: %s\n",(a)))
+#define DPE1(a)		(printk("\nENTERING: %s",(a)))
+#define DPS1(a)		(printk("  %s  ",(a)))
 
 
 /*
  * NAME:	extAlloc()
  *
- * FUNCTION:    allocate an extent for a specified page range within a
+ * FUNCTION:	allocate an extent for a specified page range within a
  *		file.
  *
  * PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
  *		  should be marked as allocated but not recorded.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 int
 extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 
 #ifdef _NOTYET
 /*
- * NAME:        extRealloc()
+ * NAME:	extRealloc()
  *
- * FUNCTION:    extend the allocation of a file extent containing a
+ * FUNCTION:	extend the allocation of a file extent containing a
  *		partial back last page.
  *
  * PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
  *		  should be marked as allocated but not recorded.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 {
@@ -345,9 +345,9 @@ exit:
 
 
 /*
- * NAME:        extHint()
+ * NAME:	extHint()
  *
- * FUNCTION:    produce an extent allocation hint for a file offset.
+ * FUNCTION:	produce an extent allocation hint for a file offset.
  *
  * PARAMETERS:
  *	ip	- the inode of the file.
@@ -356,8 +356,8 @@ exit:
  *		  the hint.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
+ *	0	- success
+ *	-EIO	- i/o error.
  */
 int extHint(struct inode *ip, s64 offset, xad_t * xp)
 {
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
 	lxdl.nlxd = 1;
 	lxdl.lxd = &lxd;
 	LXDoffset(&lxd, prev)
-	    LXDlength(&lxd, nbperpage);
+	LXDlength(&lxd, nbperpage);
 
 	xadl.maxnxad = 1;
 	xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
 	if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
 		return (rc);
 
-	/* check if not extent exists for the previous page.
+	/* check if no extent exists for the previous page.
 	 * this is possible for sparse files.
 	 */
 	if (xadl.nxad == 0) {
-//              assert(ISSPARSE(ip));
+//		assert(ISSPARSE(ip));
 		return (0);
 	}
 
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
 	 */
 	xp->flag &= XAD_NOTRECORDED;
 
-        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
+	if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
 		jfs_error(ip->i_sb, "extHint: corrupt xtree");
 		return -EIO;
-        }
+	}
 
 	return (0);
 }
 
 
 /*
- * NAME:        extRecord()
+ * NAME:	extRecord()
  *
- * FUNCTION:    change a page with a file from not recorded to recorded.
+ * FUNCTION:	change a page with a file from not recorded to recorded.
  *
  * PARAMETERS:
  *	ip	- inode of the file.
  *	cp	- cbuf of the file page.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 int extRecord(struct inode *ip, xad_t * xp)
 {
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
 
 #ifdef _NOTYET
 /*
- * NAME:        extFill()
+ * NAME:	extFill()
  *
- * FUNCTION:    allocate disk space for a file page that represents
+ * FUNCTION:	allocate disk space for a file page that represents
  *		a file hole.
  *
  * PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
  *	cp	- cbuf of the file page represent the hole.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 int extFill(struct inode *ip, xad_t * xp)
 {
 	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
 	s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
 
-//      assert(ISSPARSE(ip));
+//	assert(ISSPARSE(ip));
 
 	/* initialize the extent allocation hint */
 	XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
 /*
  * NAME:	extBalloc()
  *
- * FUNCTION:    allocate disk blocks to form an extent.
+ * FUNCTION:	allocate disk blocks to form an extent.
  *
  *		initially, we will try to allocate disk blocks for the
  *		requested size (nblocks).  if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
  *		   allocated block range.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 static int
 extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 /*
  * NAME:	extBrealloc()
  *
- * FUNCTION:    attempt to extend an extent's allocation.
+ * FUNCTION:	attempt to extend an extent's allocation.
  *
  *		Initially, we will try to extend the extent's allocation
  *		in place.  If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
  *
  * PARAMETERS:
  *	ip	 - the inode of the file.
- *	blkno    - starting block number of the extents current allocation.
- *	nblks    - number of blocks within the extents current allocation.
+ *	blkno	 - starting block number of the extents current allocation.
+ *	nblks	 - number of blocks within the extents current allocation.
  *	newnblks - pointer to a s64 value.  on entry, this value is the
  *		   the new desired extent size (number of blocks).  on
  *		   successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
  *	newblkno - the starting block number of the extents new allocation.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOSPC	- insufficient disk resources.
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOSPC	- insufficient disk resources.
  */
 static int
 extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
 
 
 /*
- * NAME:        extRoundDown()
+ * NAME:	extRoundDown()
  *
- * FUNCTION:    round down a specified number of blocks to the next
+ * FUNCTION:	round down a specified number of blocks to the next
  *		smallest power of 2 number.
  *
  * PARAMETERS:
  *	nb	- the inode of the file.
  *
  * RETURN VALUES:
- *      next smallest power of 2 number.
+ *	next smallest power of 2 number.
  */
 static s64 extRoundDown(s64 nb)
 {
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03be..b3f5463fbe5 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
 #define JFS_UNICODE	0x00000001	/* unicode name */
 
 /* mount time flags for error handling */
-#define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
-#define JFS_ERR_CONTINUE   0x00000004   /* continue */
-#define JFS_ERR_PANIC      0x00000008   /* panic */
+#define JFS_ERR_REMOUNT_RO 0x00000002	/* remount read-only */
+#define JFS_ERR_CONTINUE   0x00000004	/* continue */
+#define JFS_ERR_PANIC      0x00000008	/* panic */
 
 /* Quota support */
 #define	JFS_USRQUOTA	0x00000010
@@ -83,7 +83,6 @@
 /*	case-insensitive name/directory support */
 
 #define JFS_AIX		0x80000000	/* AIX support */
-/*	POSIX name/directory  support - Never implemented*/
 
 /*
  *	buffer cache configuration
@@ -113,10 +112,10 @@
 #define IDATASIZE	256	/* inode inline data size */
 #define	IXATTRSIZE	128	/* inode inline extended attribute size */
 
-#define XTPAGE_SIZE     4096
-#define log2_PAGESIZE     12
+#define XTPAGE_SIZE	4096
+#define log2_PAGESIZE	12
 
-#define IAG_SIZE        4096
+#define IAG_SIZE	4096
 #define IAG_EXTENT_SIZE 4096
 #define	INOSPERIAG	4096	/* number of disk inodes per iag */
 #define	L2INOSPERIAG	12	/* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda..3870ba8b908 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
 static void copy_to_dinode(struct dinode *, struct inode *);
 
 /*
- * NAME:        diMount()
+ * NAME:	diMount()
  *
- * FUNCTION:    initialize the incore inode map control structures for
+ * FUNCTION:	initialize the incore inode map control structures for
  *		a fileset or aggregate init time.
  *
- *              the inode map's control structure (dinomap) is
- *              brought in from disk and placed in virtual memory.
+ *		the inode map's control structure (dinomap) is
+ *		brought in from disk and placed in virtual memory.
  *
  * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *	ipimap	- pointer to inode map inode for the aggregate or fileset.
  *
  * RETURN VALUES:
- *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
- *      -EIO	- i/o error.
+ *	0	- success
+ *	-ENOMEM	- insufficient free virtual memory.
+ *	-EIO	- i/o error.
  */
 int diMount(struct inode *ipimap)
 {
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
 
 
 /*
- * NAME:        diUnmount()
+ * NAME:	diUnmount()
  *
- * FUNCTION:    write to disk the incore inode map control structures for
+ * FUNCTION:	write to disk the incore inode map control structures for
  *		a fileset or aggregate at unmount time.
  *
  * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *	ipimap	- pointer to inode map inode for the aggregate or fileset.
  *
  * RETURN VALUES:
- *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
- *      -EIO	- i/o error.
+ *	0	- success
+ *	-ENOMEM	- insufficient free virtual memory.
+ *	-EIO	- i/o error.
  */
 int diUnmount(struct inode *ipimap, int mounterror)
 {
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
 
 
 /*
- * NAME:        diRead()
+ * NAME:	diRead()
  *
- * FUNCTION:    initialize an incore inode from disk.
+ * FUNCTION:	initialize an incore inode from disk.
  *
  *		on entry, the specifed incore inode should itself
  *		specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
  *		this routine handles incore inode initialization for
  *		both "special" and "regular" inodes.  special inodes
  *		are those required early in the mount process and
- *	        require special handling since much of the file system
+ *		require special handling since much of the file system
  *		is not yet initialized.  these "special" inodes are
  *		identified by a NULL inode map inode pointer and are
  *		actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
  *		incore inode.
  *
  * PARAMETERS:
- *      ip  -  pointer to incore inode to be initialized from disk.
+ *	ip	-  pointer to incore inode to be initialized from disk.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
- *      -ENOMEM	- insufficient memory
+ *	0	- success
+ *	-EIO	- i/o error.
+ *	-ENOMEM	- insufficient memory
  *
  */
 int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
 
 
 /*
- * NAME:        diReadSpecial()
+ * NAME:	diReadSpecial()
  *
- * FUNCTION:    initialize a 'special' inode from disk.
+ * FUNCTION:	initialize a 'special' inode from disk.
  *
  *		this routines handles aggregate level inodes.  The
  *		inode cache cannot differentiate between the
  *		aggregate inodes and the filesystem inodes, so we
  *		handle these here.  We don't actually use the aggregate
- *	        inode map, since these inodes are at a fixed location
+ *		inode map, since these inodes are at a fixed location
  *		and in some cases the aggregate inode map isn't initialized
  *		yet.
  *
  * PARAMETERS:
- *      sb - filesystem superblock
+ *	sb - filesystem superblock
  *	inum - aggregate inode number
  *	secondary - 1 if secondary aggregate inode table
  *
  * RETURN VALUES:
- *      new inode	- success
- *      NULL		- i/o error.
+ *	new inode	- success
+ *	NULL		- i/o error.
  */
 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 {
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 }
 
 /*
- * NAME:        diWriteSpecial()
+ * NAME:	diWriteSpecial()
  *
- * FUNCTION:    Write the special inode to disk
+ * FUNCTION:	Write the special inode to disk
  *
  * PARAMETERS:
- *      ip - special inode
+ *	ip - special inode
  *	secondary - 1 if secondary aggregate inode table
  *
  * RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
 }
 
 /*
- * NAME:        diFreeSpecial()
+ * NAME:	diFreeSpecial()
  *
- * FUNCTION:    Free allocated space for special inode
+ * FUNCTION:	Free allocated space for special inode
  */
 void diFreeSpecial(struct inode *ip)
 {
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
 
 
 /*
- * NAME:        diWrite()
+ * NAME:	diWrite()
  *
- * FUNCTION:    write the on-disk inode portion of the in-memory inode
+ * FUNCTION:	write the on-disk inode portion of the in-memory inode
  *		to its corresponding on-disk inode.
  *
  *		on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
  *
  * PARAMETERS:
  *	tid -  transacation id
- *      ip  -  pointer to incore inode to be written to the inode extent.
+ *	ip  -  pointer to incore inode to be written to the inode extent.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
+ *	0	- success
+ *	-EIO	- i/o error.
  */
 int diWrite(tid_t tid, struct inode *ip)
 {
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
 	ilinelock = (struct linelock *) & tlck->lock;
 
 	/*
-	 *      regular file: 16 byte (XAD slot) granularity
+	 *	regular file: 16 byte (XAD slot) granularity
 	 */
 	if (type & tlckXTREE) {
 		xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
 				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
 	}
 	/*
-	 *      directory: 32 byte (directory entry slot) granularity
+	 *	directory: 32 byte (directory entry slot) granularity
 	 */
 	else if (type & tlckDTREE) {
 		dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
 	}
 
 	/*
-	 *      lock/copy inode base: 128 byte slot granularity
+	 *	lock/copy inode base: 128 byte slot granularity
 	 */
-// baseDinode:
 	lv = & dilinelock->lv[dilinelock->index];
 	lv->offset = dioffset >> L2INODESLOTSIZE;
 	copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
 		lv->length = 1;
 	dilinelock->index++;
 
-#ifdef _JFS_FASTDASD
-	/*
-	 * We aren't logging changes to the DASD used in directory inodes,
-	 * but we need to write them to disk.  If we don't unmount cleanly,
-	 * mount will recalculate the DASD used.
-	 */
-	if (S_ISDIR(ip->i_mode)
-	    && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
-		memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
-#endif				/*  _JFS_FASTDASD */
-
 	/* release the buffer holding the updated on-disk inode.
 	 * the buffer will be later written by commit processing.
 	 */
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
 
 
 /*
- * NAME:        diFree(ip)
+ * NAME:	diFree(ip)
  *
- * FUNCTION:    free a specified inode from the inode working map
+ * FUNCTION:	free a specified inode from the inode working map
  *		for a fileset or aggregate.
  *
  *		if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
  *		any updates and are held until all updates are complete.
  *
  * PARAMETERS:
- *      ip	- inode to be freed.
+ *	ip	- inode to be freed.
  *
  * RETURN VALUES:
- *      0       - success
- *      -EIO	- i/o error.
+ *	0	- success
+ *	-EIO	- i/o error.
  */
 int diFree(struct inode *ip)
 {
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
 	 * the map.
 	 */
 	if (iagno >= imap->im_nextiag) {
-		dump_mem("imap", imap, 32);
+		print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
+			       imap, 32, 0);
 		jfs_error(ip->i_sb,
 			  "diFree: inum = %d, iagno = %d, nextiag = %d",
 			  (uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
 		return -EIO;
 	}
 	/*
-	 *      inode extent still has some inodes or below low water mark:
-	 *      keep the inode extent;
+	 *	inode extent still has some inodes or below low water mark:
+	 *	keep the inode extent;
 	 */
 	if (bitmap ||
 	    imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
 
 
 	/*
-	 *      inode extent has become free and above low water mark:
-	 *      free the inode extent;
+	 *	inode extent has become free and above low water mark:
+	 *	free the inode extent;
 	 */
 
 	/*
-	 *      prepare to update iag list(s) (careful update step 1)
+	 *	prepare to update iag list(s) (careful update step 1)
 	 */
 	amp = bmp = cmp = dmp = NULL;
 	fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
 	invalidate_pxd_metapages(ip, freepxd);
 
 	/*
-	 *      update iag list(s) (careful update step 2)
+	 *	update iag list(s) (careful update step 2)
 	 */
 	/* add the iag to the ag extent free list if this is the
 	 * first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 
 
 /*
- * NAME:        diAlloc(pip,dir,ip)
+ * NAME:	diAlloc(pip,dir,ip)
  *
- * FUNCTION:    allocate a disk inode from the inode working map
+ * FUNCTION:	allocate a disk inode from the inode working map
  *		for a fileset or aggregate.
  *
  * PARAMETERS:
- *      pip	- pointer to incore inode for the parent inode.
- *      dir	- 'true' if the new disk inode is for a directory.
- *      ip	- pointer to a new inode
+ *	pip	- pointer to incore inode for the parent inode.
+ *	dir	- 'true' if the new disk inode is for a directory.
+ *	ip	- pointer to a new inode
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 {
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 	addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
 
 	/*
-	 *      try to allocate from the IAG
+	 *	try to allocate from the IAG
 	 */
 	/* check if the inode may be allocated from the iag
 	 * (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 
 
 /*
- * NAME:        diAllocAG(imap,agno,dir,ip)
+ * NAME:	diAllocAG(imap,agno,dir,ip)
  *
- * FUNCTION:    allocate a disk inode from the allocation group.
+ * FUNCTION:	allocate a disk inode from the allocation group.
  *
  *		this routine first determines if a new extent of free
  *		inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
  * PRE CONDITION: Already have the AG lock for this AG.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      agno	- allocation group to allocate from.
- *      dir	- 'true' if the new disk inode is for a directory.
- *      ip	- pointer to the new inode to be filled in on successful return
+ *	imap	- pointer to inode map control structure.
+ *	agno	- allocation group to allocate from.
+ *	dir	- 'true' if the new disk inode is for a directory.
+ *	ip	- pointer to the new inode to be filled in on successful return
  *		  with the disk inode number allocated, its extent address
  *		  and the start of the ag.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int
 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
 
 
 /*
- * NAME:        diAllocAny(imap,agno,dir,iap)
+ * NAME:	diAllocAny(imap,agno,dir,iap)
  *
- * FUNCTION:    allocate a disk inode from any other allocation group.
+ * FUNCTION:	allocate a disk inode from any other allocation group.
  *
  *		this routine is called when an allocation attempt within
  *		the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
  *		specified primary group.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      agno	- primary allocation group (to avoid).
- *      dir	- 'true' if the new disk inode is for a directory.
- *      ip	- pointer to a new inode to be filled in on successful return
+ *	imap	- pointer to inode map control structure.
+ *	agno	- primary allocation group (to avoid).
+ *	dir	- 'true' if the new disk inode is for a directory.
+ *	ip	- pointer to a new inode to be filled in on successful return
  *		  with the disk inode number allocated, its extent address
  *		  and the start of the ag.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int
 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
 
 
 /*
- * NAME:        diAllocIno(imap,agno,ip)
+ * NAME:	diAllocIno(imap,agno,ip)
  *
- * FUNCTION:    allocate a disk inode from the allocation group's free
+ * FUNCTION:	allocate a disk inode from the allocation group's free
  *		inode list, returning an error if this free list is
  *		empty (i.e. no iags on the list).
  *
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
  * PRE CONDITION: Already have AG lock for this AG.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      agno	- allocation group.
- *      ip	- pointer to new inode to be filled in on successful return
+ *	imap	- pointer to inode map control structure.
+ *	agno	- allocation group.
+ *	ip	- pointer to new inode to be filled in on successful return
  *		  with the disk inode number allocated, its extent address
  *		  and the start of the ag.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 
 
 /*
- * NAME:        diAllocExt(imap,agno,ip)
+ * NAME:	diAllocExt(imap,agno,ip)
  *
  * FUNCTION:	add a new extent of free inodes to an iag, allocating
  *		an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
  *		for the purpose of satisfying this request.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      agno	- allocation group number.
- *      ip	- pointer to new inode to be filled in on successful return
+ *	imap	- pointer to inode map control structure.
+ *	agno	- allocation group number.
+ *	ip	- pointer to new inode to be filled in on successful return
  *		  with the disk inode number allocated, its extent address
  *		  and the start of the ag.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 
 
 /*
- * NAME:        diAllocBit(imap,iagp,ino)
+ * NAME:	diAllocBit(imap,iagp,ino)
  *
  * FUNCTION:	allocate a backed inode from an iag.
  *
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
  *	this AG.  Must have read lock on imap inode.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      iagp	- pointer to iag.
- *      ino	- inode number to be allocated within the iag.
+ *	imap	- pointer to inode map control structure.
+ *	iagp	- pointer to iag.
+ *	ino	- inode number to be allocated within the iag.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 {
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 
 
 /*
- * NAME:        diNewExt(imap,iagp,extno)
+ * NAME:	diNewExt(imap,iagp,extno)
  *
- * FUNCTION:    initialize a new extent of inodes for an iag, allocating
- *	        the first inode of the extent for use for the current
- *	        allocation request.
+ * FUNCTION:	initialize a new extent of inodes for an iag, allocating
+ *		the first inode of the extent for use for the current
+ *		allocation request.
  *
  *		disk resources are allocated for the new extent of inodes
  *		and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
  *	this AG.  Must have read lock on imap inode.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      iagp	- pointer to iag.
- *      extno	- extent number.
+ *	imap	- pointer to inode map control structure.
+ *	iagp	- pointer to iag.
+ *	extno	- extent number.
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  */
 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 {
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 
 
 /*
- * NAME:        diNewIAG(imap,iagnop,agno)
+ * NAME:	diNewIAG(imap,iagnop,agno)
  *
  * FUNCTION:	allocate a new iag for an allocation group.
  *
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
  *		and returned to satisfy the request.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      iagnop	- pointer to an iag number set with the number of the
+ *	imap	- pointer to inode map control structure.
+ *	iagnop	- pointer to an iag number set with the number of the
  *		  newly allocated iag upon successful return.
- *      agno	- allocation group number.
+ *	agno	- allocation group number.
  *	bpp	- Buffer pointer to be filled in with new IAG's buffer
  *
  * RETURN VALUES:
- *      0       - success.
- *      -ENOSPC	- insufficient disk resources.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-ENOSPC	- insufficient disk resources.
+ *	-EIO	- i/o error.
  *
  * serialization:
  *	AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
  *
  * note: new iag transaction:
  * . synchronously write iag;
- * . write log of xtree and inode  of imap;
+ * . write log of xtree and inode of imap;
  * . commit;
  * . synchronous write of xtree (right to left, bottom to top);
  * . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 	s64 xaddr = 0;
 	s64 blkno;
 	tid_t tid;
-#ifdef _STILL_TO_PORT
-	xad_t xad;
-#endif				/*  _STILL_TO_PORT */
 	struct inode *iplist[1];
 
 	/* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 }
 
 /*
- * NAME:        diIAGRead()
+ * NAME:	diIAGRead()
  *
- * FUNCTION:    get the buffer for the specified iag within a fileset
+ * FUNCTION:	get the buffer for the specified iag within a fileset
  *		or aggregate inode map.
  *
  * PARAMETERS:
- *      imap	- pointer to inode map control structure.
- *      iagno	- iag number.
- *      bpp	- point to buffer pointer to be filled in on successful
+ *	imap	- pointer to inode map control structure.
+ *	iagno	- iag number.
+ *	bpp	- point to buffer pointer to be filled in on successful
  *		  exit.
  *
  * SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
  *	 the read lock is unnecessary.)
  *
  * RETURN VALUES:
- *      0       - success.
- *      -EIO	- i/o error.
+ *	0	- success.
+ *	-EIO	- i/o error.
  */
 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 {
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 }
 
 /*
- * NAME:        diFindFree()
+ * NAME:	diFindFree()
  *
- * FUNCTION:    find the first free bit in a word starting at
+ * FUNCTION:	find the first free bit in a word starting at
  *		the specified bit position.
  *
  * PARAMETERS:
- *      word	- word to be examined.
- *      start	- starting bit position.
+ *	word	- word to be examined.
+ *	start	- starting bit position.
  *
  * RETURN VALUES:
- *      bit position of first free bit in the word or 32 if
+ *	bit position of first free bit in the word or 32 if
  *	no free bits were found.
  */
 static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
 		   atomic_read(&imap->im_numfree));
 
 	/*
-	 *      reconstruct imap
+	 *	reconstruct imap
 	 *
 	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
 	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
 	}
 
 	/*
-	 *      process each iag page of the map.
+	 *	process each iag page of the map.
 	 *
 	 * rebuild AG Free Inode List, AG Free Inode Extent List;
 	 */
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
 
 		/* leave free iag in the free iag list */
 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
-		        release_metapage(bp);
+			release_metapage(bp);
 			continue;
 		}
 
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 }
 
 /*
- * NAME:        copy_from_dinode()
+ * NAME:	copy_from_dinode()
  *
- * FUNCTION:    Copies inode info from disk inode to in-memory inode
+ * FUNCTION:	Copies inode info from disk inode to in-memory inode
  *
  * RETURN VALUES:
- *      0       - success
- *      -ENOMEM	- insufficient memory
+ *	0	- success
+ *	-ENOMEM	- insufficient memory
  */
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 }
 
 /*
- * NAME:        copy_to_dinode()
+ * NAME:	copy_to_dinode()
  *
- * FUNCTION:    Copies inode info from in-memory inode to disk inode
+ * FUNCTION:	Copies inode info from in-memory inode to disk inode
  */
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed49..610a0e9d894 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
  *	jfs_imap.h: disk inode manager
  */
 
-#define	EXTSPERIAG	128	/* number of disk inode extent per iag  */
-#define IMAPBLKNO	0	/* lblkno of dinomap within inode map   */
-#define SMAPSZ		4	/* number of words per summary map      */
+#define	EXTSPERIAG	128	/* number of disk inode extent per iag	*/
+#define IMAPBLKNO	0	/* lblkno of dinomap within inode map	*/
+#define SMAPSZ		4	/* number of words per summary map	*/
 #define	EXTSPERSUM	32	/* number of extents per summary map entry */
 #define	L2EXTSPERSUM	5	/* l2 number of extents per summary map */
 #define	PGSPERIEXT	4	/* number of 4K pages per dinode extent */
-#define	MAXIAGS		((1<<20)-1)	/* maximum number of iags       */
-#define	MAXAG		128	/* maximum number of allocation groups  */
+#define	MAXIAGS		((1<<20)-1)	/* maximum number of iags	*/
+#define	MAXAG		128	/* maximum number of allocation groups	*/
 
-#define AMAPSIZE      512	/* bytes in the IAG allocation maps */
-#define SMAPSIZE      16	/* bytes in the IAG summary maps */
+#define AMAPSIZE	512	/* bytes in the IAG allocation maps */
+#define SMAPSIZE	16	/* bytes in the IAG summary maps */
 
 /* convert inode number to iag number */
 #define	INOTOIAG(ino)	((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
  *	inode allocation group page (per 4096 inodes of an AG)
  */
 struct iag {
-	__le64 agstart;		/* 8: starting block of ag              */
-	__le32 iagnum;		/* 4: inode allocation group number     */
-	__le32 inofreefwd;	/* 4: ag inode free list forward        */
-	__le32 inofreeback;	/* 4: ag inode free list back           */
-	__le32 extfreefwd;	/* 4: ag inode extent free list forward */
-	__le32 extfreeback;	/* 4: ag inode extent free list back    */
-	__le32 iagfree;		/* 4: iag free list                     */
+	__le64 agstart;		/* 8: starting block of ag		*/
+	__le32 iagnum;		/* 4: inode allocation group number	*/
+	__le32 inofreefwd;	/* 4: ag inode free list forward	*/
+	__le32 inofreeback;	/* 4: ag inode free list back		*/
+	__le32 extfreefwd;	/* 4: ag inode extent free list forward	*/
+	__le32 extfreeback;	/* 4: ag inode extent free list back	*/
+	__le32 iagfree;		/* 4: iag free list			*/
 
 	/* summary map: 1 bit per inode extent */
 	__le32 inosmap[SMAPSZ];	/* 16: sum map of mapwords w/ free inodes;
-				 *      note: this indicates free and backed
-				 *      inodes, if the extent is not backed the
-				 *      value will be 1.  if the extent is
-				 *      backed but all inodes are being used the
-				 *      value will be 1.  if the extent is
-				 *      backed but at least one of the inodes is
-				 *      free the value will be 0.
+				 *	note: this indicates free and backed
+				 *	inodes, if the extent is not backed the
+				 *	value will be 1.  if the extent is
+				 *	backed but all inodes are being used the
+				 *	value will be 1.  if the extent is
+				 *	backed but at least one of the inodes is
+				 *	free the value will be 0.
 				 */
 	__le32 extsmap[SMAPSZ];	/* 16: sum map of mapwords w/ free extents */
-	__le32 nfreeinos;		/* 4: number of free inodes             */
-	__le32 nfreeexts;		/* 4: number of free extents            */
+	__le32 nfreeinos;	/* 4: number of free inodes		*/
+	__le32 nfreeexts;	/* 4: number of free extents		*/
 	/* (72) */
 	u8 pad[1976];		/* 1976: pad to 2048 bytes */
 	/* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
-	__le32 wmap[EXTSPERIAG];	/* 512: working allocation map  */
+	__le32 wmap[EXTSPERIAG];	/* 512: working allocation map */
 	__le32 pmap[EXTSPERIAG];	/* 512: persistent allocation map */
 	pxd_t inoext[EXTSPERIAG];	/* 1024: inode extent addresses */
 };				/* (4096) */
@@ -93,44 +93,44 @@ struct iag {
  *	per AG control information (in inode map control page)
  */
 struct iagctl_disk {
-	__le32 inofree;		/* 4: free inode list anchor            */
-	__le32 extfree;		/* 4: free extent list anchor           */
-	__le32 numinos;		/* 4: number of backed inodes           */
-	__le32 numfree;		/* 4: number of free inodes             */
+	__le32 inofree;		/* 4: free inode list anchor		*/
+	__le32 extfree;		/* 4: free extent list anchor		*/
+	__le32 numinos;		/* 4: number of backed inodes		*/
+	__le32 numfree;		/* 4: number of free inodes		*/
 };				/* (16) */
 
 struct iagctl {
-	int inofree;		/* free inode list anchor            */
-	int extfree;		/* free extent list anchor           */
-	int numinos;		/* number of backed inodes           */
-	int numfree;		/* number of free inodes             */
+	int inofree;		/* free inode list anchor		*/
+	int extfree;		/* free extent list anchor		*/
+	int numinos;		/* number of backed inodes		*/
+	int numfree;		/* number of free inodes		*/
 };
 
 /*
  *	per fileset/aggregate inode map control page
  */
 struct dinomap_disk {
-	__le32 in_freeiag;	/* 4: free iag list anchor     */
-	__le32 in_nextiag;	/* 4: next free iag number     */
-	__le32 in_numinos;	/* 4: num of backed inodes */
+	__le32 in_freeiag;	/* 4: free iag list anchor	*/
+	__le32 in_nextiag;	/* 4: next free iag number	*/
+	__le32 in_numinos;	/* 4: num of backed inodes	*/
 	__le32 in_numfree;	/* 4: num of free backed inodes */
 	__le32 in_nbperiext;	/* 4: num of blocks per inode extent */
-	__le32 in_l2nbperiext;	/* 4: l2 of in_nbperiext */
-	__le32 in_diskblock;	/* 4: for standalone test driver  */
-	__le32 in_maxag;	/* 4: for standalone test driver  */
-	u8 pad[2016];		/* 2016: pad to 2048 */
+	__le32 in_l2nbperiext;	/* 4: l2 of in_nbperiext	*/
+	__le32 in_diskblock;	/* 4: for standalone test driver */
+	__le32 in_maxag;	/* 4: for standalone test driver */
+	u8 pad[2016];		/* 2016: pad to 2048		*/
 	struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
 };				/* (4096) */
 
 struct dinomap {
-	int in_freeiag;		/* free iag list anchor     */
-	int in_nextiag;		/* next free iag number     */
-	int in_numinos;		/* num of backed inodes */
-	int in_numfree;		/* num of free backed inodes */
+	int in_freeiag;		/* free iag list anchor		*/
+	int in_nextiag;		/* next free iag number		*/
+	int in_numinos;		/* num of backed inodes		*/
+	int in_numfree;		/* num of free backed inodes	*/
 	int in_nbperiext;	/* num of blocks per inode extent */
-	int in_l2nbperiext;	/* l2 of in_nbperiext */
-	int in_diskblock;	/* for standalone test driver  */
-	int in_maxag;		/* for standalone test driver  */
+	int in_l2nbperiext;	/* l2 of in_nbperiext		*/
+	int in_diskblock;	/* for standalone test driver	*/
+	int in_maxag;		/* for standalone test driver	*/
 	struct iagctl in_agctl[MAXAG];	/* AG control information */
 };
 
@@ -139,9 +139,9 @@ struct dinomap {
  */
 struct inomap {
 	struct dinomap im_imap;		/* 4096: inode allocation control */
-	struct inode *im_ipimap;	/* 4: ptr to inode for imap   */
-	struct mutex im_freelock;	/* 4: iag free list lock      */
-	struct mutex im_aglock[MAXAG];	/* 512: per AG locks          */
+	struct inode *im_ipimap;	/* 4: ptr to inode for imap	*/
+	struct mutex im_freelock;	/* 4: iag free list lock	*/
+	struct mutex im_aglock[MAXAG];	/* 512: per AG locks		*/
 	u32 *im_DBGdimap;
 	atomic_t im_numinos;	/* num of backed inodes */
 	atomic_t im_numfree;	/* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c8..cb8f30985ad 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
 	uint	mode2;		/* jfs-specific mode		*/
 	uint	saved_uid;	/* saved for uid mount option */
 	uint	saved_gid;	/* saved for gid mount option */
-	pxd_t   ixpxd;		/* inode extent descriptor	*/
+	pxd_t	ixpxd;		/* inode extent descriptor	*/
 	dxd_t	acl;		/* dxd describing acl	*/
 	dxd_t	ea;		/* dxd describing ea	*/
 	time_t	otime;		/* time created	*/
@@ -190,7 +190,7 @@ struct jfs_sb_info {
 	uint		gengen;		/* inode generation generator*/
 	uint		inostamp;	/* shows inode belongs to fileset*/
 
-        /* Formerly in ipbmap */
+	/* Formerly in ipbmap */
 	struct bmap	*bmap;		/* incore bmap descriptor	*/
 	struct nls_table *nls_tab;	/* current codepage		*/
 	struct inode *direct_inode;	/* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98..de3e4a506db 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		goto writeRecord;
 
 	/*
-	 *      initialize/update page/transaction recovery lsn
+	 *	initialize/update page/transaction recovery lsn
 	 */
 	lsn = log->lsn;
 
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      initialize/update lsn of tblock of the page
+	 *	initialize/update lsn of tblock of the page
 	 *
 	 * transaction inherits oldest lsn of pages associated
 	 * with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	LOGSYNC_UNLOCK(log, flags);
 
 	/*
-	 *      write the log record
+	 *	write the log record
 	 */
       writeRecord:
 	lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		goto moveLrd;
 
 	/*
-	 *      move log record data
+	 *	move log record data
 	 */
 	/* retrieve source meta-data page to log */
 	if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      move log record descriptor
+	 *	move log record descriptor
 	 */
       moveLrd:
 	lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
 	LOGGC_LOCK(log);
 
 	/*
-	 *      write or queue the full page at the tail of write queue
+	 *	write or queue the full page at the tail of write queue
 	 */
 	/* get the tail tblk on commit queue */
 	if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
 	LOGGC_UNLOCK(log);
 
 	/*
-	 *      allocate/initialize next page
+	 *	allocate/initialize next page
 	 */
 	/* if log wraps, the first data page of log is 2
 	 * (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 		}
 
 	/*
-	 *      forward syncpt
+	 *	forward syncpt
 	 */
 	/* if last sync is same as last syncpt,
 	 * invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 		lsn = log->lsn;
 
 	/*
-	 *      setup next syncpt trigger (SWAG)
+	 *	setup next syncpt trigger (SWAG)
 	 */
 	logsize = log->logsize;
 
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 	if (more < 2 * LOGPSIZE) {
 		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 		/*
-		 *      log wrapping
+		 *	log wrapping
 		 *
 		 * option 1 - panic ? No.!
 		 * option 2 - shutdown file systems
-		 *            associated with log ?
+		 *	      associated with log ?
 		 * option 3 - extend log ?
 		 */
 		/*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
 /*
  * NAME:	lmLogOpen()
  *
- * FUNCTION:    open the log on first open;
+ * FUNCTION:	open the log on first open;
  *	insert filesystem in the active list of the log.
  *
  * PARAMETER:	ipmnt	- file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
 	init_waitqueue_head(&log->syncwait);
 
 	/*
-	 *      external log as separate logical volume
+	 *	external log as separate logical volume
 	 *
 	 * file systems to log may have n-to-1 relationship;
 	 */
@@ -1155,7 +1155,7 @@ journal_found:
 	return 0;
 
 	/*
-	 *      unwind on error
+	 *	unwind on error
 	 */
       shutdown:		/* unwind lbmLogInit() */
 	list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
 	return 0;
 
 	/*
-	 *      unwind on error
+	 *	unwind on error
 	 */
       errout30:		/* release log page */
 	log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
 
 	if (test_bit(log_INLINELOG, &log->flag)) {
 		/*
-		 *      in-line log in host file system
+		 *	in-line log in host file system
 		 */
 		rc = lmLogShutdown(log);
 		kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
 		goto out;
 
 	/*
-	 *      external log as separate logical volume
+	 *	external log as separate logical volume
 	 */
 	list_del(&log->journal_list);
 	bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 	if (!list_empty(&log->synclist)) {
 		struct logsyncblk *lp;
 
+		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
 		list_for_each_entry(lp, &log->synclist, synclist) {
 			if (lp->xflag & COMMIT_PAGE) {
 				struct metapage *mp = (struct metapage *)lp;
-				dump_mem("orphan metapage", lp,
-					 sizeof(struct metapage));
-				dump_mem("page", mp->page, sizeof(struct page));
-			}
-			else
-				dump_mem("orphan tblock", lp,
-					 sizeof(struct tblock));
+				print_hex_dump(KERN_ERR, "metapage: ",
+					       DUMP_PREFIX_ADDRESS, 16, 4,
+					       mp, sizeof(struct metapage), 0);
+				print_hex_dump(KERN_ERR, "page: ",
+					       DUMP_PREFIX_ADDRESS, 16,
+					       sizeof(long), mp->page,
+					       sizeof(struct page), 0);
+			} else
+				print_hex_dump(KERN_ERR, "tblock:",
+					       DUMP_PREFIX_ADDRESS, 16, 4,
+					       lp, sizeof(struct tblock), 0);
 		}
 	}
+#else
+	WARN_ON(!list_empty(&log->synclist));
 #endif
-	//assert(list_empty(&log->synclist));
 	clear_bit(log_FLUSH, &log->flag);
 }
 
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
  *
  * PARAMETE:	log	- pointer to logs inode.
  *		fsdev	- kdev_t of filesystem.
- *		serial  - pointer to returned log serial number
+ *		serial	- pointer to returned log serial number
  *		activate - insert/remove device from active list.
  *
  * RETURN:	0	- success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
  * FUNCTION:	add a log buffer to the log redrive list
  *
  * PARAMETER:
- *     bp	- log buffer
+ *	bp	- log buffer
  *
  * NOTES:
  *	Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
 	bp->l_flag = flag;
 
 	/*
-	 *      insert bp at tail of write queue associated with log
+	 *	insert bp at tail of write queue associated with log
 	 *
 	 * (request is either for bp already/currently at head of queue
 	 * or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
 	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
 
 	/*
-	 *      initiate pageout of the page
+	 *	initiate pageout of the page
 	 */
 	lbmStartIO(bp);
 }
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
  *
  * FUNCTION:	Interface to DD strategy routine
  *
- * RETURN:      none
+ * RETURN:	none
  *
  * serialization: LCACHE_LOCK() is NOT held during log i/o;
  */
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
 	bio_put(bio);
 
 	/*
-	 *      pagein completion
+	 *	pagein completion
 	 */
 	if (bp->l_flag & lbmREAD) {
 		bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
 	}
 
 	/*
-	 *      pageout completion
+	 *	pageout completion
 	 *
 	 * the bp at the head of write queue has completed pageout.
 	 *
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
 	}
 
 	/*
-	 *      synchronous pageout:
+	 *	synchronous pageout:
 	 *
 	 * buffer has not necessarily been removed from write queue
 	 * (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
 	}
 
 	/*
-	 *      Group Commit pageout:
+	 *	Group Commit pageout:
 	 */
 	else if (bp->l_flag & lbmGC) {
 		LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
 	}
 
 	/*
-	 *      asynchronous pageout:
+	 *	asynchronous pageout:
 	 *
 	 * buffer must have been removed from write queue:
 	 * insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
  * FUNCTION:	format file system log
  *
  * PARAMETERS:
- *      log	- volume log
+ *	log	- volume log
  *	logAddress - start address of log space in FS block
  *	logSize	- length of log space in FS block;
  *
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
 	npages = logSize >> sbi->l2nbperpage;
 
 	/*
-	 *      log space:
+	 *	log space:
 	 *
 	 * page 0 - reserved;
 	 * page 1 - log superblock;
 	 * page 2 - log data page: A SYNC log record is written
-	 *          into this page at logform time;
+	 *	    into this page at logform time;
 	 * pages 3-N - log data page: set to empty log data pages;
 	 */
 	/*
-	 *      init log superblock: log page 1
+	 *	init log superblock: log page 1
 	 */
 	logsuper = (struct logsuper *) bp->l_ldata;
 
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
 		goto exit;
 
 	/*
-	 *      init pages 2 to npages-1 as log data pages:
+	 *	init pages 2 to npages-1 as log data pages:
 	 *
 	 * log page sequence number (lpsn) initialization:
 	 *
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
 		goto exit;
 
 	/*
-	 *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
 	 */
 	for (lspn = 0; lspn < npages - 3; lspn++) {
 		lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
 	rc = 0;
 exit:
 	/*
-	 *      finalize log
+	 *	finalize log
 	 */
 	/* release the buffer */
 	lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea21..1f85ef0ec04 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
  *
  * (this comment should be rewritten !)
  * jfs uses only "after" log records (only a single writer is allowed
- * in a  page, pages are written to temporary paging space if
+ * in a page, pages are written to temporary paging space if
  * if they must be written to disk before commit, and i/o is
  * scheduled for modified pages to their home location after
  * the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
  *
  * a log record consists of a data area of variable length followed by
  * a descriptor of fixed size LOGRDSIZE bytes.
- * the  data area is rounded up to an integral number of 4-bytes and
+ * the data area is rounded up to an integral number of 4-bytes and
  * must be no longer than LOGPSIZE.
  * the descriptor is of size of multiple of 4-bytes and aligned on a
  * 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
 	union {
 
 		/*
-		 *      COMMIT: commit
+		 *	COMMIT: commit
 		 *
 		 * transaction commit: no type-dependent information;
 		 */
 
 		/*
-		 *      REDOPAGE: after-image
+		 *	REDOPAGE: after-image
 		 *
 		 * apply after-image;
 		 *
@@ -236,7 +236,7 @@ struct lrd {
 		} redopage;	/* (20) */
 
 		/*
-		 *      NOREDOPAGE: the page is freed
+		 *	NOREDOPAGE: the page is freed
 		 *
 		 * do not apply after-image records which precede this record
 		 * in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
 		} noredopage;	/* (20) */
 
 		/*
-		 *      UPDATEMAP: update block allocation map
+		 *	UPDATEMAP: update block allocation map
 		 *
 		 * either in-line PXD,
 		 * or     out-of-line  XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
 		} updatemap;	/* (20) */
 
 		/*
-		 *      NOREDOINOEXT: the inode extent is freed
+		 *	NOREDOINOEXT: the inode extent is freed
 		 *
 		 * do not apply after-image records which precede this
 		 * record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
 		} noredoinoext;	/* (20) */
 
 		/*
-		 *      SYNCPT: log sync point
+		 *	SYNCPT: log sync point
 		 *
 		 * replay log upto syncpt address specified;
 		 */
@@ -295,13 +295,13 @@ struct lrd {
 		} syncpt;
 
 		/*
-		 *      MOUNT: file system mount
+		 *	MOUNT: file system mount
 		 *
 		 * file system mount: no type-dependent information;
 		 */
 
 		/*
-		 *      ? FREEXTENT: free specified extent(s)
+		 *	? FREEXTENT: free specified extent(s)
 		 *
 		 * free specified extent(s) from block allocation map
 		 * N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
 		} freextent;
 
 		/*
-		 *      ? NOREDOFILE: this file is freed
+		 *	? NOREDOFILE: this file is freed
 		 *
 		 * do not apply records which precede this record in the log
 		 * with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
 		} noredofile;
 
 		/*
-		 *      ? NEWPAGE:
+		 *	? NEWPAGE:
 		 *
 		 * metadata type dependent
 		 */
@@ -342,7 +342,7 @@ struct lrd {
 		} newpage;
 
 		/*
-		 *      ? DUMMY: filler
+		 *	? DUMMY: filler
 		 *
 		 * no type-dependent information
 		 */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbe..77c7f1129dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
 	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
 	goto skip;
 dump_bio:
-	dump_mem("bio", bio, sizeof(*bio));
+	print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
+		       4, bio, sizeof(*bio), 0);
 skip:
 	bio_put(bio);
 	unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd47983489..644429acb8c 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
  */
 int jfs_mount(struct super_block *sb)
 {
-	int rc = 0;		/* Return code          */
+	int rc = 0;		/* Return code */
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
 	struct inode *ipaimap = NULL;
 	struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
 		sbi->ipaimap2 = NULL;
 
 	/*
-	 *      mount (the only/single) fileset
+	 *	mount (the only/single) fileset
 	 */
 	/*
 	 * open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
 	goto out;
 
 	/*
-	 *      unwind on error
+	 *	unwind on error
 	 */
       errout41:		/* close fileset inode allocation map inode */
 	diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d5..7aa1f7004ea 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
  */
 
 /*
- *      jfs_txnmgr.c: transaction manager
+ *	jfs_txnmgr.c: transaction manager
  *
  * notes:
  * transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
 #include "jfs_debug.h"
 
 /*
- *      transaction management structures
+ *	transaction management structures
  */
 static struct {
 	int freetid;		/* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
 MODULE_PARM_DESC(nTxLock,
 		 "Number of transaction locks (max:65536)");
 
-struct tblock *TxBlock;	        /* transaction block table */
-static int TxLockLWM;		/* Low water mark for number of txLocks used */
-static int TxLockHWM;		/* High water mark for number of txLocks used */
-static int TxLockVHWM;		/* Very High water mark */
-struct tlock *TxLock;           /* transaction lock table */
+struct tblock *TxBlock;	/* transaction block table */
+static int TxLockLWM;	/* Low water mark for number of txLocks used */
+static int TxLockHWM;	/* High water mark for number of txLocks used */
+static int TxLockVHWM;	/* Very High water mark */
+struct tlock *TxLock;	/* transaction lock table */
 
 /*
- *      transaction management lock
+ *	transaction management lock
  */
 static DEFINE_SPINLOCK(jfsTxnLock);
 
-#define TXN_LOCK()              spin_lock(&jfsTxnLock)
-#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
+#define TXN_LOCK()		spin_lock(&jfsTxnLock)
+#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 
 #define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 #define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 #define TXN_WAKEUP(event) wake_up_all(event)
 
 /*
- *      statistics
+ *	statistics
  */
 static struct {
 	tid_t maxtid;		/* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 static void LogSyncRelease(struct metapage * mp);
 
 /*
- *              transaction block/lock management
- *              ---------------------------------
+ *		transaction block/lock management
+ *		---------------------------------
  */
 
 /*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
 }
 
 /*
- * NAME:        txInit()
+ * NAME:	txInit()
  *
- * FUNCTION:    initialize transaction management structures
+ * FUNCTION:	initialize transaction management structures
  *
  * RETURN:
  *
@@ -333,9 +333,9 @@ int txInit(void)
 }
 
 /*
- * NAME:        txExit()
+ * NAME:	txExit()
  *
- * FUNCTION:    clean up when module is unloaded
+ * FUNCTION:	clean up when module is unloaded
  */
 void txExit(void)
 {
@@ -346,12 +346,12 @@ void txExit(void)
 }
 
 /*
- * NAME:        txBegin()
+ * NAME:	txBegin()
  *
- * FUNCTION:    start a transaction.
+ * FUNCTION:	start a transaction.
  *
- * PARAMETER:   sb	- superblock
- *              flag	- force for nested tx;
+ * PARAMETER:	sb	- superblock
+ *		flag	- force for nested tx;
  *
  * RETURN:	tid	- transaction id
  *
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
 }
 
 /*
- * NAME:        txBeginAnon()
+ * NAME:	txBeginAnon()
  *
- * FUNCTION:    start an anonymous transaction.
+ * FUNCTION:	start an anonymous transaction.
  *		Blocks if logsync or available tlocks are low to prevent
  *		anonymous tlocks from depleting supply.
  *
- * PARAMETER:   sb	- superblock
+ * PARAMETER:	sb	- superblock
  *
  * RETURN:	none
  */
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
 }
 
 /*
- *      txEnd()
+ *	txEnd()
  *
  * function: free specified transaction block.
  *
- *      logsync barrier processing:
+ *	logsync barrier processing:
  *
  * serialization:
  */
@@ -577,13 +577,13 @@ wakeup:
 }
 
 /*
- *      txLock()
+ *	txLock()
  *
  * function: acquire a transaction lock on the specified <mp>
  *
  * parameter:
  *
- * return:      transaction lock id
+ * return:	transaction lock id
  *
  * serialization:
  */
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	/* Only locks on ipimap or ipaimap should reach here */
 	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 	if (jfs_ip->fileset != AGGREGATE_I) {
-		jfs_err("txLock: trying to lock locked page!");
-		dump_mem("ip", ip, sizeof(struct inode));
-		dump_mem("mp", mp, sizeof(struct metapage));
-		dump_mem("Locker's tblk", tid_to_tblock(tid),
-			 sizeof(struct tblock));
-		dump_mem("Tlock", tlck, sizeof(struct tlock));
+		printk(KERN_ERR "txLock: trying to lock locked page!");
+		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
+			       ip, sizeof(*ip), 0);
+		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
+			       mp, sizeof(*mp), 0);
+		print_hex_dump(KERN_ERR, "Locker's tblock: ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
+			       sizeof(struct tblock), 0);
+		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
+			       tlck, sizeof(*tlck), 0);
 		BUG();
 	}
 	INCREMENT(stattx.waitlock);	/* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 }
 
 /*
- * NAME:        txRelease()
+ * NAME:	txRelease()
  *
- * FUNCTION:    Release buffers associated with transaction locks, but don't
+ * FUNCTION:	Release buffers associated with transaction locks, but don't
  *		mark homeok yet.  The allows other transactions to modify
  *		buffers, but won't let them go to disk until commit record
  *		actually gets written.
  *
  * PARAMETER:
- *              tblk    -
+ *		tblk	-
  *
- * RETURN:      Errors from subroutines.
+ * RETURN:	Errors from subroutines.
  */
 static void txRelease(struct tblock * tblk)
 {
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
 }
 
 /*
- * NAME:        txUnlock()
+ * NAME:	txUnlock()
  *
- * FUNCTION:    Initiates pageout of pages modified by tid in journalled
- *              objects and frees their lockwords.
+ * FUNCTION:	Initiates pageout of pages modified by tid in journalled
+ *		objects and frees their lockwords.
  */
 static void txUnlock(struct tblock * tblk)
 {
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
 }
 
 /*
- *      txMaplock()
+ *	txMaplock()
  *
  * function: allocate a transaction lock for freed page/entry;
- *      for freed page, maplock is used as xtlock/dtlock type;
+ *	for freed page, maplock is used as xtlock/dtlock type;
  */
 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 {
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 }
 
 /*
- *      txLinelock()
+ *	txLinelock()
  *
  * function: allocate a transaction lock for log vector list
  */
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
 }
 
 /*
- *              transaction commit management
- *              -----------------------------
+ *		transaction commit management
+ *		-----------------------------
  */
 
 /*
- * NAME:        txCommit()
- *
- * FUNCTION:    commit the changes to the objects specified in
- *              clist.  For journalled segments only the
- *              changes of the caller are committed, ie by tid.
- *              for non-journalled segments the data are flushed to
- *              disk and then the change to the disk inode and indirect
- *              blocks committed (so blocks newly allocated to the
- *              segment will be made a part of the segment atomically).
- *
- *              all of the segments specified in clist must be in
- *              one file system. no more than 6 segments are needed
- *              to handle all unix svcs.
- *
- *              if the i_nlink field (i.e. disk inode link count)
- *              is zero, and the type of inode is a regular file or
- *              directory, or symbolic link , the inode is truncated
- *              to zero length. the truncation is committed but the
- *              VM resources are unaffected until it is closed (see
- *              iput and iclose).
+ * NAME:	txCommit()
+ *
+ * FUNCTION:	commit the changes to the objects specified in
+ *		clist.  For journalled segments only the
+ *		changes of the caller are committed, ie by tid.
+ *		for non-journalled segments the data are flushed to
+ *		disk and then the change to the disk inode and indirect
+ *		blocks committed (so blocks newly allocated to the
+ *		segment will be made a part of the segment atomically).
+ *
+ *		all of the segments specified in clist must be in
+ *		one file system. no more than 6 segments are needed
+ *		to handle all unix svcs.
+ *
+ *		if the i_nlink field (i.e. disk inode link count)
+ *		is zero, and the type of inode is a regular file or
+ *		directory, or symbolic link , the inode is truncated
+ *		to zero length. the truncation is committed but the
+ *		VM resources are unaffected until it is closed (see
+ *		iput and iclose).
  *
  * PARAMETER:
  *
  * RETURN:
  *
  * serialization:
- *              on entry the inode lock on each segment is assumed
- *              to be held.
+ *		on entry the inode lock on each segment is assumed
+ *		to be held.
  *
  * i/o error:
  */
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
 		tblk->xflag |= COMMIT_LAZY;
 	/*
-	 *      prepare non-journaled objects for commit
+	 *	prepare non-journaled objects for commit
 	 *
 	 * flush data pages of non-journaled file
 	 * to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	cd.nip = nip;
 
 	/*
-	 *      acquire transaction lock on (on-disk) inodes
+	 *	acquire transaction lock on (on-disk) inodes
 	 *
 	 * update on-disk inode from in-memory inode
 	 * acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	}
 
 	/*
-	 *      write log records from transaction locks
+	 *	write log records from transaction locks
 	 *
 	 * txUpdateMap() resets XAD_NEW in XAD.
 	 */
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
 
 	/*
-	 *      write COMMIT log record
+	 *	write COMMIT log record
 	 */
 	lrd->type = cpu_to_le16(LOG_COMMIT);
 	lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	lmGroupCommit(log, tblk);
 
 	/*
-	 *      - transaction is now committed -
+	 *	- transaction is now committed -
 	 */
 
 	/*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		txForce(tblk);
 
 	/*
-	 *      update allocation map.
+	 *	update allocation map.
 	 *
 	 * update inode allocation map and inode:
 	 * free pager lock on memory object of inode if any.
-	 * update  block allocation map.
+	 * update block allocation map.
 	 *
 	 * txUpdateMap() resets XAD_NEW in XAD.
 	 */
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		txUpdateMap(tblk);
 
 	/*
-	 *      free transaction locks and pageout/free pages
+	 *	free transaction locks and pageout/free pages
 	 */
 	txRelease(tblk);
 
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 
 
 	/*
-	 *      reset in-memory object state
+	 *	reset in-memory object state
 	 */
 	for (k = 0; k < cd.nip; k++) {
 		ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
 }
 
 /*
- * NAME:        txLog()
+ * NAME:	txLog()
  *
- * FUNCTION:    Writes AFTER log records for all lines modified
- *              by tid for segments specified by inodes in comdata.
- *              Code assumes only WRITELOCKS are recorded in lockwords.
+ * FUNCTION:	Writes AFTER log records for all lines modified
+ *		by tid for segments specified by inodes in comdata.
+ *		Code assumes only WRITELOCKS are recorded in lockwords.
  *
  * PARAMETERS:
  *
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
 }
 
 /*
- *      diLog()
+ *	diLog()
  *
- * function:    log inode tlock and format maplock to update bmap;
+ * function:	log inode tlock and format maplock to update bmap;
  */
 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-	  struct tlock * tlck, struct commit * cd)
+		 struct tlock * tlck, struct commit * cd)
 {
 	int rc = 0;
 	struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	pxd = &lrd->log.redopage.pxd;
 
 	/*
-	 *      inode after image
+	 *	inode after image
 	 */
 	if (tlck->type & tlckENTRY) {
 		/* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		tlck->flag |= tlckWRITEPAGE;
 	} else if (tlck->type & tlckFREE) {
 		/*
-		 *      free inode extent
+		 *	free inode extent
 		 *
 		 * (pages of the freed inode extent have been invalidated and
 		 * a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		jfs_err("diLog: UFO type tlck:0x%p", tlck);
 #ifdef  _JFS_WIP
 	/*
-	 *      alloc/free external EA extent
+	 *	alloc/free external EA extent
 	 *
 	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
 	 * of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 
 /*
- *      dataLog()
+ *	dataLog()
  *
- * function:    log data tlock
+ * function:	log data tlock
  */
 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	    struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 
 /*
- *      dtLog()
+ *	dtLog()
  *
- * function:    log dtree tlock and format maplock to update bmap;
+ * function:	log dtree tlock and format maplock to update bmap;
  */
 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	   struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
 
 	/*
-	 *      page extension via relocation: entry insertion;
-	 *      page extension in-place: entry insertion;
-	 *      new right page from page split, reinitialized in-line
-	 *      root from root page split: entry insertion;
+	 *	page extension via relocation: entry insertion;
+	 *	page extension in-place: entry insertion;
+	 *	new right page from page split, reinitialized in-line
+	 *	root from root page split: entry insertion;
 	 */
 	if (tlck->type & (tlckNEW | tlckEXTEND)) {
 		/* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      entry insertion/deletion,
-	 *      sibling page link update (old right page before split);
+	 *	entry insertion/deletion,
+	 *	sibling page link update (old right page before split);
 	 */
 	if (tlck->type & (tlckENTRY | tlckRELINK)) {
 		/* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      page deletion: page has been invalidated
-	 *      page relocation: source extent
+	 *	page deletion: page has been invalidated
+	 *	page relocation: source extent
 	 *
-	 *      a maplock for free of the page has been formatted
-	 *      at txLock() time);
+	 *	a maplock for free of the page has been formatted
+	 *	at txLock() time);
 	 */
 	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
 		/* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 
 /*
- *      xtLog()
+ *	xtLog()
  *
- * function:    log xtree tlock and format maplock to update bmap;
+ * function:	log xtree tlock and format maplock to update bmap;
  */
 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	   struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	xadlock = (struct xdlistlock *) maplock;
 
 	/*
-	 *      entry insertion/extension;
-	 *      sibling page link update (old right page before split);
+	 *	entry insertion/extension;
+	 *	sibling page link update (old right page before split);
 	 */
 	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
 		/* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      page deletion: file deletion/truncation (ref. xtTruncate())
+	 *	page deletion: file deletion/truncation (ref. xtTruncate())
 	 *
 	 * (page will be invalidated after log is written and bmap
 	 * is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 
 	/*
-	 *      page/entry truncation: file truncation (ref. xtTruncate())
+	 *	page/entry truncation: file truncation (ref. xtTruncate())
 	 *
-	 *     |----------+------+------+---------------|
-	 *                |      |      |
-	 *                |      |     hwm - hwm before truncation
-	 *                |     next - truncation point
-	 *               lwm - lwm before truncation
+	 *	|----------+------+------+---------------|
+	 *		   |      |      |
+	 *		   |      |     hwm - hwm before truncation
+	 *		   |     next - truncation point
+	 *		  lwm - lwm before truncation
 	 * header ?
 	 */
 	if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		twm = xtlck->twm.offset;
 
 		/*
-		 *      write log records
+		 *	write log records
 		 */
 		/* log after-image for logredo():
 		 *
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		}
 
 		/*
-		 *      format maplock(s) for txUpdateMap() to update bmap
+		 *	format maplock(s) for txUpdateMap() to update bmap
 		 */
 		maplock->index = 0;
 
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 
 /*
- *      mapLog()
+ *	mapLog()
  *
- * function:    log from maplock of freed data extents;
+ * function:	log from maplock of freed data extents;
  */
 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		   struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	pxd_t *pxd;
 
 	/*
-	 *      page relocation: free the source page extent
+	 *	page relocation: free the source page extent
 	 *
 	 * a maplock for txUpdateMap() for free of the page
 	 * has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 
 /*
- *      txEA()
+ *	txEA()
  *
- * function:    acquire maplock for EA/ACL extents or
- *              set COMMIT_INLINE flag;
+ * function:	acquire maplock for EA/ACL extents or
+ *		set COMMIT_INLINE flag;
  */
 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 {
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 }
 
 /*
- *      txForce()
+ *	txForce()
  *
  * function: synchronously write pages locked by transaction
- *              after txLog() but before txUpdateMap();
+ *	     after txLog() but before txUpdateMap();
  */
 static void txForce(struct tblock * tblk)
 {
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
 }
 
 /*
- *      txUpdateMap()
+ *	txUpdateMap()
  *
- * function:    update persistent allocation map (and working map
- *              if appropriate);
+ * function:	update persistent allocation map (and working map
+ *		if appropriate);
  *
  * parameter:
  */
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
 
 
 	/*
-	 *      update block allocation map
+	 *	update block allocation map
 	 *
 	 * update allocation state in pmap (and wmap) and
 	 * update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
 		}
 	}
 	/*
-	 *      update inode allocation map
+	 *	update inode allocation map
 	 *
 	 * update allocation state in pmap and
 	 * update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
 }
 
 /*
- *      txAllocPMap()
+ *	txAllocPMap()
  *
  * function: allocate from persistent map;
  *
  * parameter:
- *      ipbmap  -
- *      malock -
- *              xad list:
- *              pxd:
- *
- *      maptype -
- *              allocate from persistent map;
- *              free from persistent map;
- *              (e.g., tmp file - free from working map at releae
- *               of last reference);
- *              free from persistent and working map;
- *
- *      lsn     - log sequence number;
+ *	ipbmap	-
+ *	malock	-
+ *		xad list:
+ *		pxd:
+ *
+ *	maptype -
+ *		allocate from persistent map;
+ *		free from persistent map;
+ *		(e.g., tmp file - free from working map at releae
+ *		 of last reference);
+ *		free from persistent and working map;
+ *
+ *	lsn	- log sequence number;
  */
 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 			struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 }
 
 /*
- *      txFreeMap()
+ *	txFreeMap()
  *
- * function:    free from persistent and/or working map;
+ * function:	free from persistent and/or working map;
  *
  * todo: optimization
  */
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
 }
 
 /*
- *      txFreelock()
+ *	txFreelock()
  *
- * function:    remove tlock from inode anonymous locklist
+ * function:	remove tlock from inode anonymous locklist
  */
 void txFreelock(struct inode *ip)
 {
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
 }
 
 /*
- *      txAbort()
+ *	txAbort()
  *
  * function: abort tx before commit;
  *
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
 }
 
 /*
- *      txLazyCommit(void)
+ *	txLazyCommit(void)
  *
  *	All transactions except those changing ipimap (COMMIT_FORCE) are
  *	processed by this routine.  This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
 }
 
 /*
- *      jfs_lazycommit(void)
+ *	jfs_lazycommit(void)
  *
  *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
  *	context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
 }
 
 /*
- *      jfs_sync(void)
+ *	jfs_sync(void)
  *
  *	To be run as a kernel daemon.  This is awakened when tlocks run low.
  *	We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afc..ab728893701 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock;	/* transaction block table */
  */
 struct tlock {
 	lid_t next;		/* 2: index next lockword on tid locklist
-				 *          next lockword on freelist
+				 *	    next lockword on freelist
 				 */
 	tid_t tid;		/* 2: transaction id holding lock */
 
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b25295868..649f9817acc 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
 /*
  *	jfs_types.h:
  *
- * basic type/utility  definitions
+ * basic type/utility definitions
  *
  * note: this header file must be the 1st include file
  * of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
  */
 
 #define LEFTMOSTONE	0x80000000
-#define	HIGHORDER	0x80000000u	/* high order bit on            */
-#define	ONES		0xffffffffu	/* all bit on                   */
+#define	HIGHORDER	0x80000000u	/* high order bit on	*/
+#define	ONES		0xffffffffu	/* all bit on		*/
 
 /*
  *	logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
 #define sizeDXD(dxd)	le32_to_cpu((dxd)->size)
 
 /*
- *      directory entry argument
+ *	directory entry argument
  */
 struct component_name {
 	int namlen;
@@ -160,14 +160,14 @@ struct component_name {
  *	DASD limit information - stored in directory inode
  */
 struct dasd {
-	u8 thresh;		/* Alert Threshold (in percent) */
-	u8 delta;		/* Alert Threshold delta (in percent)   */
+	u8 thresh;		/* Alert Threshold (in percent)		*/
+	u8 delta;		/* Alert Threshold delta (in percent)	*/
 	u8 rsrvd1;
-	u8 limit_hi;		/* DASD limit (in logical blocks)       */
-	__le32 limit_lo;	/* DASD limit (in logical blocks)       */
+	u8 limit_hi;		/* DASD limit (in logical blocks)	*/
+	__le32 limit_lo;	/* DASD limit (in logical blocks)	*/
 	u8 rsrvd2[3];
-	u8 used_hi;		/* DASD usage (in logical blocks)       */
-	__le32 used_lo;		/* DASD usage (in logical blocks)       */
+	u8 used_hi;		/* DASD usage (in logical blocks)	*/
+	__le32 used_lo;		/* DASD usage (in logical blocks)	*/
 };
 
 #define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73f..7971f37534a 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
 	jfs_info("UnMount JFS: sb:0x%p", sb);
 
 	/*
-	 *      update superblock and close log
+	 *	update superblock and close log
 	 *
 	 * if mounted read-write and log based recovery was enabled
 	 */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a..1543906a2e0 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 /*
- *      jfs_xtree.c: extent allocation descriptor B+-tree manager
+ *	jfs_xtree.c: extent allocation descriptor B+-tree manager
  */
 
 #include <linux/fs.h>
@@ -32,30 +32,30 @@
 /*
  * xtree local flag
  */
-#define XT_INSERT       0x00000001
+#define XT_INSERT	0x00000001
 
 /*
- *       xtree key/entry comparison: extent offset
+ *	xtree key/entry comparison: extent offset
  *
  * return:
- *      -1: k < start of extent
- *       0: start_of_extent <= k <= end_of_extent
- *       1: k > end_of_extent
+ *	-1: k < start of extent
+ *	 0: start_of_extent <= k <= end_of_extent
+ *	 1: k > end_of_extent
  */
 #define XT_CMP(CMP, K, X, OFFSET64)\
 {\
-        OFFSET64 = offsetXAD(X);\
-        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
-              ((K) < OFFSET64) ? -1 : 0;\
+	OFFSET64 = offsetXAD(X);\
+	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+		((K) < OFFSET64) ? -1 : 0;\
 }
 
 /* write a xad entry */
 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
 {\
-        (XAD)->flag = (FLAG);\
-        XADoffset((XAD), (OFF));\
-        XADlength((XAD), (LEN));\
-        XADaddress((XAD), (ADDR));\
+	(XAD)->flag = (FLAG);\
+	XADoffset((XAD), (OFF));\
+	XADlength((XAD), (LEN));\
+	XADaddress((XAD), (ADDR));\
 }
 
 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
 			MP = NULL;\
 			RC = -EIO;\
 		}\
-        }\
+	}\
 }
 
 /* for consistency */
 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
 
-#define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
+#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
 	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
 /* xtree entry parameter descriptor */
 struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
 
 
 /*
- *      statistics
+ *	statistics
  */
 #ifdef CONFIG_JFS_STATISTICS
 static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 #endif				/*  _STILL_TO_PORT */
 
 /*
- *      xtLookup()
+ *	xtLookup()
  *
  * function: map a single page into a physical extent;
  */
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
 	}
 
 	/*
-	 *      compute the physical extent covering logical extent
+	 *	compute the physical extent covering logical extent
 	 *
 	 * N.B. search may have failed (e.g., hole in sparse file),
 	 * and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
 
 
 /*
- *      xtLookupList()
+ *	xtLookupList()
  *
  * function: map a single logical extent into a list of physical extent;
  *
  * parameter:
- *      struct inode    *ip,
- *      struct lxdlist  *lxdlist,       lxd list (in)
- *      struct xadlist  *xadlist,       xad list (in/out)
- *      int		flag)
+ *	struct inode	*ip,
+ *	struct lxdlist	*lxdlist,	lxd list (in)
+ *	struct xadlist	*xadlist,	xad list (in/out)
+ *	int		flag)
  *
  * coverage of lxd by xad under assumption of
  * . lxd's are ordered and disjoint.
  * . xad's are ordered and disjoint.
  *
  * return:
- *      0:      success
+ *	0:	success
  *
  * note: a page being written (even a single byte) is backed fully,
- *      except the last page which is only backed with blocks
- *      required to cover the last byte;
- *      the extent backing a page is fully contained within an xad;
+ *	except the last page which is only backed with blocks
+ *	required to cover the last byte;
+ *	the extent backing a page is fully contained within an xad;
  */
 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 		 struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 		return rc;
 
 	/*
-	 *      compute the physical extent covering logical extent
+	 *	compute the physical extent covering logical extent
 	 *
 	 * N.B. search may have failed (e.g., hole in sparse file),
 	 * and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 		if (lstart >= size)
 			goto mapend;
 
-		/* compare with the current xad  */
+		/* compare with the current xad */
 		goto compare1;
 	}
 	/* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 	/*
 	 * lxd is partially covered by xad
 	 */
-	else {			/* (xend < lend)  */
+	else {			/* (xend < lend) */
 
 		/*
 		 * get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 
 
 /*
- *      xtSearch()
+ *	xtSearch()
  *
- * function:    search for the xad entry covering specified offset.
+ * function:	search for the xad entry covering specified offset.
  *
  * parameters:
- *      ip      - file object;
- *      xoff    - extent offset;
- *      nextp	- address of next extent (if any) for search miss
- *      cmpp    - comparison result:
- *      btstack - traverse stack;
- *      flag    - search process flag (XT_INSERT);
+ *	ip	- file object;
+ *	xoff	- extent offset;
+ *	nextp	- address of next extent (if any) for search miss
+ *	cmpp	- comparison result:
+ *	btstack - traverse stack;
+ *	flag	- search process flag (XT_INSERT);
  *
  * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *	btstack contains (bn, index) of search path traversed to the entry.
+ *	*cmpp is set to result of comparison with the entry returned.
+ *	the page containing the entry is pinned at exit.
  */
 static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 		    int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 	btstack->nsplit = 0;
 
 	/*
-	 *      search down tree from root:
+	 *	search down tree from root:
 	 *
 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 			XT_CMP(cmp, xoff, &p->xad[index], t64);
 			if (cmp == 0) {
 				/*
-				 *      search hit
+				 *	search hit
 				 */
 				/* search hit - leaf page:
 				 * return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 		}
 
 		/*
-		 *      search miss
+		 *	search miss
 		 *
 		 * base is the smallest index with key (Kj) greater than
 		 * search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 }
 
 /*
- *      xtInsert()
+ *	xtInsert()
  *
  * function:
  *
  * parameter:
- *      tid     - transaction id;
- *      ip      - file object;
- *      xflag   - extent flag (XAD_NOTRECORDED):
- *      xoff    - extent offset;
- *      xlen    - extent length;
- *      xaddrp  - extent address pointer (in/out):
- *              if (*xaddrp)
- *                      caller allocated data extent at *xaddrp;
- *              else
- *                      allocate data extent and return its xaddr;
- *      flag    -
+ *	tid	- transaction id;
+ *	ip	- file object;
+ *	xflag	- extent flag (XAD_NOTRECORDED):
+ *	xoff	- extent offset;
+ *	xlen	- extent length;
+ *	xaddrp	- extent address pointer (in/out):
+ *		if (*xaddrp)
+ *			caller allocated data extent at *xaddrp;
+ *		else
+ *			allocate data extent and return its xaddr;
+ *	flag	-
  *
  * return:
  */
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 
 	/*
-	 *      search for the entry location at which to insert:
+	 *	search for the entry location at which to insert:
 	 *
 	 * xtFastSearch() and xtSearch() both returns (leaf page
 	 * pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid,		/* transaction id */
 	}
 
 	/*
-	 *      insert entry for new extent
+	 *	insert entry for new extent
 	 */
 	xflag |= XAD_NEW;
 
 	/*
-	 *      if the leaf page is full, split the page and
-	 *      propagate up the router entry for the new page from split
+	 *	if the leaf page is full, split the page and
+	 *	propagate up the router entry for the new page from split
 	 *
 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
 	 */
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 	}
 
 	/*
-	 *      insert the new entry into the leaf page
+	 *	insert the new entry into the leaf page
 	 */
 	/*
 	 * acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid,		/* transaction id */
 
 
 /*
- *      xtSplitUp()
+ *	xtSplitUp()
  *
  * function:
- *      split full pages as propagating insertion up the tree
+ *	split full pages as propagating insertion up the tree
  *
  * parameter:
- *      tid     - transaction id;
- *      ip      - file object;
- *      split   - entry parameter descriptor;
- *      btstack - traverse stack from xtSearch()
+ *	tid	- transaction id;
+ *	ip	- file object;
+ *	split	- entry parameter descriptor;
+ *	btstack - traverse stack from xtSearch()
  *
  * return:
  */
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
 
 
 /*
- *      xtSplitPage()
+ *	xtSplitPage()
  *
  * function:
- *      split a full non-root page into
- *      original/split/left page and new right page
- *      i.e., the original/split page remains as left page.
+ *	split a full non-root page into
+ *	original/split/left page and new right page
+ *	i.e., the original/split page remains as left page.
  *
  * parameter:
- *      int		tid,
- *      struct inode    *ip,
- *      struct xtsplit  *split,
- *      struct metapage	**rmpp,
- *      u64		*rbnp,
+ *	int		tid,
+ *	struct inode	*ip,
+ *	struct xtsplit	*split,
+ *	struct metapage	**rmpp,
+ *	u64		*rbnp,
  *
  * return:
- *      Pointer to page in which to insert or NULL on error.
+ *	Pointer to page in which to insert or NULL on error.
  */
 static int
 xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
 	rbn = addressPXD(pxd);
 
 	/* Allocate blocks to quota. */
-       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
-	       rc = -EDQUOT;
-	       goto clean_up;
+	if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
+		rc = -EDQUOT;
+		goto clean_up;
 	}
 
 	quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
 	skip = split->index;
 
 	/*
-	 *      sequential append at tail (after last entry of last page)
+	 *	sequential append at tail (after last entry of last page)
 	 *
 	 * if splitting the last page on a level because of appending
 	 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
 	}
 
 	/*
-	 *      non-sequential insert (at possibly middle page)
+	 *	non-sequential insert (at possibly middle page)
 	 */
 
 	/*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
 
 
 /*
- *      xtSplitRoot()
+ *	xtSplitRoot()
  *
  * function:
- *      split the full root page into
- *      original/root/split page and new right page
- *      i.e., root remains fixed in tree anchor (inode) and
- *      the root is copied to a single new right child page
- *      since root page << non-root page, and
- *      the split root page contains a single entry for the
- *      new right child page.
+ *	split the full root page into original/root/split page and new
+ *	right page
+ *	i.e., root remains fixed in tree anchor (inode) and the root is
+ *	copied to a single new right child page since root page <<
+ *	non-root page, and the split root page contains a single entry
+ *	for the new right child page.
  *
  * parameter:
- *      int		tid,
- *      struct inode    *ip,
- *      struct xtsplit  *split,
- *      struct metapage	**rmpp)
+ *	int		tid,
+ *	struct inode	*ip,
+ *	struct xtsplit	*split,
+ *	struct metapage	**rmpp)
  *
  * return:
- *      Pointer to page in which to insert or NULL on error.
+ *	Pointer to page in which to insert or NULL on error.
  */
 static int
 xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
 	INCREMENT(xtStat.split);
 
 	/*
-	 *      allocate a single (right) child page
+	 *	allocate a single (right) child page
 	 */
 	pxdlist = split->pxdlist;
 	pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
 	}
 
 	/*
-	 *      reset the root
+	 *	reset the root
 	 *
 	 * init root with the single entry for the new right page
 	 * set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
 
 
 /*
- *      xtExtend()
+ *	xtExtend()
  *
  * function: extend in-place;
  *
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid,		/* transaction id */
 		goto extendOld;
 
 	/*
-	 *      extent overflow: insert entry for new extent
+	 *	extent overflow: insert entry for new extent
 	 */
 //insertNew:
 	xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid,		/* transaction id */
 	nextindex = le16_to_cpu(p->header.nextindex);
 
 	/*
-	 *      if the leaf page is full, insert the new entry and
-	 *      propagate up the router entry for the new page from split
+	 *	if the leaf page is full, insert the new entry and
+	 *	propagate up the router entry for the new page from split
 	 *
 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
 	 */
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid,		/* transaction id */
 		}
 	}
 	/*
-	 *      insert the new entry into the leaf page
+	 *	insert the new entry into the leaf page
 	 */
 	else {
 		/* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid,		/* transaction id */
 
 #ifdef _NOTYET
 /*
- *      xtTailgate()
+ *	xtTailgate()
  *
  * function: split existing 'tail' extent
- *      (split offset >= start offset of tail extent), and
- *      relocate and extend the split tail half;
+ *	(split offset >= start offset of tail extent), and
+ *	relocate and extend the split tail half;
  *
  * note: existing extent may or may not have been committed.
  * caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid,		/* transaction id */
 
 /*
 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
-        (ulong)xoff, xlen, (ulong)xaddr);
+	(ulong)xoff, xlen, (ulong)xaddr);
 */
 
 	/* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
 	xad = &p->xad[index];
 /*
 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
-        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
+	(ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
 */
 	if ((llen = xoff - offsetXAD(xad)) == 0)
 		goto updateOld;
 
 	/*
-	 *      partially replace extent: insert entry for new extent
+	 *	partially replace extent: insert entry for new extent
 	 */
 //insertNew:
 	/*
-	 *      if the leaf page is full, insert the new entry and
-	 *      propagate up the router entry for the new page from split
+	 *	if the leaf page is full, insert the new entry and
+	 *	propagate up the router entry for the new page from split
 	 *
 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
 	 */
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 		}
 	}
 	/*
-	 *      insert the new entry into the leaf page
+	 *	insert the new entry into the leaf page
 	 */
 	else {
 		/* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 #endif /* _NOTYET */
 
 /*
- *      xtUpdate()
+ *	xtUpdate()
  *
  * function: update XAD;
  *
- *      update extent for allocated_but_not_recorded or
- *      compressed extent;
+ *	update extent for allocated_but_not_recorded or
+ *	compressed extent;
  *
  * parameter:
- *      nxad    - new XAD;
- *                logical extent of the specified XAD must be completely
- *                contained by an existing XAD;
+ *	nxad	- new XAD;
+ *		logical extent of the specified XAD must be completely
+ *		contained by an existing XAD;
  */
 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 {				/* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 
 
 /*
- *      xtAppend()
+ *	xtAppend()
  *
  * function: grow in append mode from contiguous region specified ;
  *
  * parameter:
- *      tid             - transaction id;
- *      ip              - file object;
- *      xflag           - extent flag:
- *      xoff            - extent offset;
- *      maxblocks       - max extent length;
- *      xlen            - extent length (in/out);
- *      xaddrp          - extent address pointer (in/out):
- *      flag            -
+ *	tid		- transaction id;
+ *	ip		- file object;
+ *	xflag		- extent flag:
+ *	xoff		- extent offset;
+ *	maxblocks	- max extent length;
+ *	xlen		- extent length (in/out);
+ *	xaddrp		- extent address pointer (in/out):
+ *	flag		-
  *
  * return:
  */
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
 
 	/*
-	 *      search for the entry location at which to insert:
+	 *	search for the entry location at which to insert:
 	 *
 	 * xtFastSearch() and xtSearch() both returns (leaf page
 	 * pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid,		/* transaction id */
 		xlen = min(xlen, (int)(next - xoff));
 //insert:
 	/*
-	 *      insert entry for new extent
+	 *	insert entry for new extent
 	 */
 	xflag |= XAD_NEW;
 
 	/*
-	 *      if the leaf page is full, split the page and
-	 *      propagate up the router entry for the new page from split
+	 *	if the leaf page is full, split the page and
+	 *	propagate up the router entry for the new page from split
 	 *
 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
 	 */
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	return 0;
 
 	/*
-	 *      insert the new entry into the leaf page
+	 *	insert the new entry into the leaf page
 	 */
       insertLeaf:
 	/*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid,		/* transaction id */
 
 /* - TBD for defragmentaion/reorganization -
  *
- *      xtDelete()
+ *	xtDelete()
  *
  * function:
- *      delete the entry with the specified key.
+ *	delete the entry with the specified key.
  *
- *      N.B.: whole extent of the entry is assumed to be deleted.
+ *	N.B.: whole extent of the entry is assumed to be deleted.
  *
  * parameter:
  *
  * return:
- *       ENOENT: if the entry is not found.
+ *	ENOENT: if the entry is not found.
  *
  * exception:
  */
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 
 /* - TBD for defragmentaion/reorganization -
  *
- *      xtDeleteUp()
+ *	xtDeleteUp()
  *
  * function:
- *      free empty pages as propagating deletion up the tree
+ *	free empty pages as propagating deletion up the tree
  *
  * parameter:
  *
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
 
 
 /*
- * NAME:        xtRelocate()
+ * NAME:	xtRelocate()
  *
- * FUNCTION:    relocate xtpage or data extent of regular file;
- *              This function is mainly used by defragfs utility.
+ * FUNCTION:	relocate xtpage or data extent of regular file;
+ *		This function is mainly used by defragfs utility.
  *
- * NOTE:        This routine does not have the logic to handle
- *              uncommitted allocated extent. The caller should call
- *              txCommit() to commit all the allocation before call
- *              this routine.
+ * NOTE:	This routine does not have the logic to handle
+ *		uncommitted allocated extent. The caller should call
+ *		txCommit() to commit all the allocation before call
+ *		this routine.
  */
 int
 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 		 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
 
 	/*
-	 *      1. get and validate the parent xtpage/xad entry
-	 *      covering the source extent to be relocated;
+	 *	1. get and validate the parent xtpage/xad entry
+	 *	covering the source extent to be relocated;
 	 */
 	if (xtype == DATAEXT) {
 		/* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 	jfs_info("xtRelocate: parent xad entry validated.");
 
 	/*
-	 *      2. relocate the extent
+	 *	2. relocate the extent
 	 */
 	if (xtype == DATAEXT) {
 		/* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 			XT_PUTPAGE(pmp);
 
 		/*
-		 *      cmRelocate()
+		 *	cmRelocate()
 		 *
 		 * copy target data pages to be relocated;
 		 *
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 		pno = offset >> CM_L2BSIZE;
 		npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
 /*
-                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
-                         (offset >> CM_L2BSIZE) + 1;
+		npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
+			  (offset >> CM_L2BSIZE) + 1;
 */
 		sxaddr = oxaddr;
 		dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 
 		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
 		jfs_info("xtRelocate: target data extent relocated.");
-	} else {		/* (xtype  == XTPAGE) */
+	} else {		/* (xtype == XTPAGE) */
 
 		/*
 		 * read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 		 */
 		if (lmp) {
 			BT_MARK_DIRTY(lmp, ip);
-			tlck =
-			    txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
+			tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
 			lp->header.next = cpu_to_le64(nxaddr);
 			XT_PUTPAGE(lmp);
 		}
 
 		if (rmp) {
 			BT_MARK_DIRTY(rmp, ip);
-			tlck =
-			    txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
+			tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
 			rp->header.prev = cpu_to_le64(nxaddr);
 			XT_PUTPAGE(rmp);
 		}
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 		 * scan may be skipped by commit() and logredo();
 		 */
 		BT_MARK_DIRTY(mp, ip);
-		/* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
+		/* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
 		xtlck = (struct xtlock *) & tlck->lock;
 
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 	}
 
 	/*
-	 *      3. acquire maplock for the source extent to be freed;
+	 *	3. acquire maplock for the source extent to be freed;
 	 *
 	 * acquire a maplock saving the src relocated extent address;
 	 * to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 	 *      is no buffer associated with this lock since the buffer
 	 *      has been redirected to the target location.
 	 */
-	else			/* (xtype  == XTPAGE) */
+	else			/* (xtype == XTPAGE) */
 		tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
 
 	pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 	pxdlock->index = 1;
 
 	/*
-	 *      4. update the parent xad entry for relocation;
+	 *	4. update the parent xad entry for relocation;
 	 *
 	 * acquire tlck for the parent entry with XAD_NEW as entry
 	 * update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 
 
 /*
- *      xtSearchNode()
+ *	xtSearchNode()
  *
- * function:    search for the internal xad entry covering specified extent.
- *              This function is mainly used by defragfs utility.
+ * function:	search for the internal xad entry covering specified extent.
+ *		This function is mainly used by defragfs utility.
  *
  * parameters:
- *      ip      - file object;
- *      xad     - extent to find;
- *      cmpp    - comparison result:
- *      btstack - traverse stack;
- *      flag    - search process flag;
+ *	ip	- file object;
+ *	xad	- extent to find;
+ *	cmpp	- comparison result:
+ *	btstack - traverse stack;
+ *	flag	- search process flag;
  *
  * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *	btstack contains (bn, index) of search path traversed to the entry.
+ *	*cmpp is set to result of comparison with the entry returned.
+ *	the page containing the entry is pinned at exit.
  */
 static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 			int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 	xaddr = addressXAD(xad);
 
 	/*
-	 *      search down tree from root:
+	 *	search down tree from root:
 	 *
 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 			XT_CMP(cmp, xoff, &p->xad[index], t64);
 			if (cmp == 0) {
 				/*
-				 *      search hit
+				 *	search hit
 				 *
 				 * verify for exact match;
 				 */
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 		}
 
 		/*
-		 *      search miss - non-leaf page:
+		 *	search miss - non-leaf page:
 		 *
 		 * base is the smallest index with key (Kj) greater than
 		 * search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 
 
 /*
- *      xtRelink()
+ *	xtRelink()
  *
  * function:
- *      link around a freed page.
+ *	link around a freed page.
  *
  * Parameter:
- *      int           tid,
- *      struct inode    *ip,
- *      xtpage_t        *p)
+ *	int		tid,
+ *	struct inode	*ip,
+ *	xtpage_t	*p)
  *
  * returns:
  */
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
 
 
 /*
- *      xtInitRoot()
+ *	xtInitRoot()
  *
  * initialize file root (inline in inode)
  */
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
 #define MAX_TRUNCATE_LEAVES 50
 
 /*
- *      xtTruncate()
+ *	xtTruncate()
  *
  * function:
- *      traverse for truncation logging backward bottom up;
- *      terminate at the last extent entry at the current subtree
- *      root page covering new down size.
- *      truncation may occur within the last extent entry.
+ *	traverse for truncation logging backward bottom up;
+ *	terminate at the last extent entry at the current subtree
+ *	root page covering new down size.
+ *	truncation may occur within the last extent entry.
  *
  * parameter:
- *      int           tid,
- *      struct inode    *ip,
- *      s64           newsize,
- *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
+ *	int		tid,
+ *	struct inode	*ip,
+ *	s64		newsize,
+ *	int		type)	{PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
  *
  * return:
  *
  * note:
- *      PWMAP:
- *       1. truncate (non-COMMIT_NOLINK file)
- *          by jfs_truncate() or jfs_open(O_TRUNC):
- *          xtree is updated;
+ *	PWMAP:
+ *	 1. truncate (non-COMMIT_NOLINK file)
+ *	    by jfs_truncate() or jfs_open(O_TRUNC):
+ *	    xtree is updated;
  *	 2. truncate index table of directory when last entry removed
- *       map update via tlock at commit time;
- *      PMAP:
+ *	map update via tlock at commit time;
+ *	PMAP:
  *	 Call xtTruncate_pmap instead
- *      WMAP:
- *       1. remove (free zero link count) on last reference release
- *          (pmap has been freed at commit zero link count);
- *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
- *          xtree is updated;
- *       map update directly at truncation time;
+ *	WMAP:
+ *	 1. remove (free zero link count) on last reference release
+ *	    (pmap has been freed at commit zero link count);
+ *	 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
+ *	    xtree is updated;
+ *	 map update directly at truncation time;
  *
- *      if (DELETE)
- *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
- *      else if (TRUNCATE)
- *              must write LOG_NOREDOPAGE for deleted index page;
+ *	if (DELETE)
+ *		no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
+ *	else if (TRUNCATE)
+ *		must write LOG_NOREDOPAGE for deleted index page;
  *
  * pages may already have been tlocked by anonymous transactions
  * during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 	 * retained in the new sized file.
 	 * if type is PMAP, the data and index pages are NOT
 	 * freed, and the data and index blocks are NOT freed
-	 * from  working map.
+	 * from working map.
 	 * (this will allow continued access of data/index of
 	 * temporary file (zerolink count file truncated to zero-length)).
 	 */
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 		goto getChild;
 
 	/*
-	 *      leaf page
+	 *	leaf page
 	 */
 	freed = 0;
 
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 	}
 
 	/*
-	 *      internal page: go down to child page of current entry
+	 *	internal page: go down to child page of current entry
 	 */
       getChild:
 	/* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 
 
 /*
- *      xtTruncate_pmap()
+ *	xtTruncate_pmap()
  *
  * function:
  *	Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
  *	is committed to disk.
  *
  * parameter:
- *      tid_t		tid,
- *      struct inode	*ip,
- *      s64		committed_size)
+ *	tid_t		tid,
+ *	struct inode	*ip,
+ *	s64		committed_size)
  *
  * return: new committed size
  *
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 	}
 
 	/*
-	 *      leaf page
+	 *	leaf page
 	 */
 
 	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 		xoff = offsetXAD(xad);
 		xlen = lengthXAD(xad);
 		XT_PUTPAGE(mp);
-		return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+		return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
 	}
 	tlck = txLock(tid, ip, mp, tlckXTREE);
 	tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 		 */
 		tlck = txLock(tid, ip, mp, tlckXTREE);
 		xtlck = (struct xtlock *) & tlck->lock;
-		xtlck->hwm.offset =
-		    le16_to_cpu(p->header.nextindex) - 1;
+		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
 		tlck->type = tlckXTREE | tlckFREE;
 
 		XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 	else
 		index--;
 	/*
-	 *      internal page: go down to child page of current entry
+	 *	internal page: go down to child page of current entry
 	 */
       getChild:
 	/* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b101..70815c8a3d6 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
 #define _H_JFS_XTREE
 
 /*
- *      jfs_xtree.h: extent allocation descriptor B+-tree manager
+ *	jfs_xtree.h: extent allocation descriptor B+-tree manager
  */
 
 #include "jfs_btree.h"
 
 
 /*
- *      extent allocation descriptor (xad)
+ *	extent allocation descriptor (xad)
  */
 typedef struct xad {
 	unsigned flag:8;	/* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
 	__le32 addr2;		/* 4: address in unit of fsblksize */
 } xad_t;			/* (16) */
 
-#define MAXXLEN         ((1 << 24) - 1)
+#define MAXXLEN		((1 << 24) - 1)
 
-#define XTSLOTSIZE      16
-#define L2XTSLOTSIZE    4
+#define XTSLOTSIZE	16
+#define L2XTSLOTSIZE	4
 
 /* xad_t field construction */
 #define XADoffset(xad, offset64)\
 {\
-        (xad)->off1 = ((u64)offset64) >> 32;\
-        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
+	(xad)->off1 = ((u64)offset64) >> 32;\
+	(xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
 }
 #define XADaddress(xad, address64)\
 {\
-        (xad)->addr1 = ((u64)address64) >> 32;\
-        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+	(xad)->addr1 = ((u64)address64) >> 32;\
+	(xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
 }
-#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
+#define XADlength(xad, length32)	(xad)->len = __cpu_to_le24(length32)
 
 /* xad_t field extraction */
 #define offsetXAD(xad)\
-        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
+	( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
 #define addressXAD(xad)\
-        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
-#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
+	( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
+#define lengthXAD(xad)	__le24_to_cpu((xad)->len)
 
 /* xad list */
 struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
 };
 
 /* xad_t flags */
-#define XAD_NEW         0x01	/* new */
-#define XAD_EXTENDED    0x02	/* extended */
-#define XAD_COMPRESSED  0x04	/* compressed with recorded length */
+#define XAD_NEW		0x01	/* new */
+#define XAD_EXTENDED	0x02	/* extended */
+#define XAD_COMPRESSED	0x04	/* compressed with recorded length */
 #define XAD_NOTRECORDED 0x08	/* allocated but not recorded */
-#define XAD_COW         0x10	/* copy-on-write */
+#define XAD_COW		0x10	/* copy-on-write */
 
 
 /* possible values for maxentry */
-#define XTROOTINITSLOT_DIR  6
-#define XTROOTINITSLOT  10
-#define XTROOTMAXSLOT   18
-#define XTPAGEMAXSLOT   256
-#define XTENTRYSTART    2
+#define XTROOTINITSLOT_DIR 6
+#define XTROOTINITSLOT	10
+#define XTROOTMAXSLOT	18
+#define XTPAGEMAXSLOT	256
+#define XTENTRYSTART	2
 
 /*
- *      xtree page:
+ *	xtree page:
  */
 typedef union {
 	struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
 } xtpage_t;
 
 /*
- *      external declaration
+ *	external declaration
  */
 extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
 		    int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c20477126..25161c4121e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
  *		dentry	- child directory dentry
  *
  * RETURN:	-EINVAL	- if name is . or ..
- *		-EINVAL  - if . or .. exist but are invalid.
+ *		-EINVAL - if . or .. exist but are invalid.
  *		errors from subroutines
  *
  * note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	inode_dec_link_count(ip);
 
 	/*
-	 *      commit zero link count object
+	 *	commit zero link count object
 	 */
 	if (ip->i_nlink == 0) {
 		assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 /*
  * NAME:	commitZeroLink()
  *
- * FUNCTION:    for non-directory, called by jfs_remove(),
+ * FUNCTION:	for non-directory, called by jfs_remove(),
  *		truncate a regular file, directory or symbolic
  *		link to zero length. return 0 if type is not
  *		one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
 /*
  * NAME:	jfs_free_zero_link()
  *
- * FUNCTION:    for non-directory, called by iClose(),
+ * FUNCTION:	for non-directory, called by iClose(),
  *		free resources of a file from cache and WORKING map
  *		for a file previously committed with zero link count
  *		while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
  * NAME:	jfs_symlink(dip, dentry, name)
  *
  * FUNCTION:	creates a symbolic link to <symlink> by name <name>
- *		        in directory <dip>
+ *			in directory <dip>
  *
- * PARAMETER:	dip	    - parent directory vnode
- *		        dentry	- dentry of symbolic link
- *		        name    - the path name of the existing object
- *			              that will be the source of the link
+ * PARAMETER:	dip	- parent directory vnode
+ *		dentry	- dentry of symbolic link
+ *		name	- the path name of the existing object
+ *			  that will be the source of the link
  *
  * RETURN:	errors from subroutines
  *
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 
 /*
- * NAME:        jfs_rename
+ * NAME:	jfs_rename
  *
- * FUNCTION:    rename a file or directory
+ * FUNCTION:	rename a file or directory
  */
 static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 
 /*
- * NAME:        jfs_mknod
+ * NAME:	jfs_mknod
  *
- * FUNCTION:    Create a special file (device)
+ * FUNCTION:	Create a special file (device)
  */
 static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 		int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f73..71984ee9534 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
 
-#define BITSPERPAGE     (PSIZE << 3)
-#define L2MEGABYTE      20
-#define MEGABYTE        (1 << L2MEGABYTE)
-#define MEGABYTE32     (MEGABYTE << 5)
+#define BITSPERPAGE	(PSIZE << 3)
+#define L2MEGABYTE	20
+#define MEGABYTE	(1 << L2MEGABYTE)
+#define MEGABYTE32	(MEGABYTE << 5)
 
 /* convert block number to bmap file page number */
 #define BLKTODMAPN(b)\
-        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
+	(((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
 
 /*
- *      jfs_extendfs()
+ *	jfs_extendfs()
  *
  * function: extend file system;
  *
@@ -48,9 +48,9 @@
  *                                   workspace  space
  *
  * input:
- *      new LVSize: in LV blocks (required)
- *      new LogSize: in LV blocks (optional)
- *      new FSSize: in LV blocks (optional)
+ *	new LVSize: in LV blocks (required)
+ *	new LogSize: in LV blocks (optional)
+ *	new FSSize: in LV blocks (optional)
  *
  * new configuration:
  * 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	}
 
 	/*
-	 *      reconfigure LV spaces
-	 *      ---------------------
+	 *	reconfigure LV spaces
+	 *	---------------------
 	 *
 	 * validate new size, or, if not specified, determine new size
 	 */
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 		log_formatted = 1;
 	}
 	/*
-	 *      quiesce file system
+	 *	quiesce file system
 	 *
 	 * (prepare to move the inline log and to prevent map update)
 	 *
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	}
 
 	/*
-	 *      extend block allocation map
-	 *      ---------------------------
+	 *	extend block allocation map
+	 *	---------------------------
 	 *
 	 * extendfs() for new extension, retry after crash recovery;
 	 *
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 *  s_size: aggregate size in physical blocks;
 	 */
 	/*
-	 *      compute the new block allocation map configuration
+	 *	compute the new block allocation map configuration
 	 *
 	 * map dinode:
 	 *  di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	newNpages = BLKTODMAPN(t64) + 1;
 
 	/*
-	 *      extend map from current map (WITHOUT growing mapfile)
+	 *	extend map from current map (WITHOUT growing mapfile)
 	 *
 	 * map new extension with unmapped part of the last partial
 	 * dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	XSize -= nblocks;
 
 	/*
-	 *      grow map file to cover remaining extension
-	 *      and/or one extra dmap page for next extendfs();
+	 *	grow map file to cover remaining extension
+	 *	and/or one extra dmap page for next extendfs();
 	 *
 	 * allocate new map pages and its backing blocks, and
 	 * update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	dbFinalizeBmap(ipbmap);
 
 	/*
-	 *      update inode allocation map
-	 *      ---------------------------
+	 *	update inode allocation map
+	 *	---------------------------
 	 *
 	 * move iag lists from old to new iag;
 	 * agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	}
 
 	/*
-	 *      finalize
-	 *      --------
+	 *	finalize
+	 *	--------
 	 *
 	 * extension is committed when on-disk super block is
 	 * updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	diFreeSpecial(ipbmap2);
 
 	/*
-	 *      update superblock
+	 *	update superblock
 	 */
 	if ((rc = readSuper(sb, &bh)))
 		goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 
       resume:
 	/*
-	 *      resume file system transactions
+	 *	resume file system transactions
 	 */
 	txResume(sb);
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba21645..b2375f0774b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
  *
  *   On-disk:
  *
- *     FEALISTs are stored on disk using blocks allocated by dbAlloc() and
- *     written directly. An EA list may be in-lined in the inode if there is
- *     sufficient room available.
+ *	FEALISTs are stored on disk using blocks allocated by dbAlloc() and
+ *	written directly. An EA list may be in-lined in the inode if there is
+ *	sufficient room available.
  */
 
 struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
       size_check:
 	if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
 		printk(KERN_ERR "ea_get: invalid extended attribute\n");
-		dump_mem("xattr", ea_buf->xattr, ea_size);
+		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+				     ea_buf->xattr, ea_size, 1);
 		ea_release(inode, ea_buf);
 		rc = -EIO;
 		goto clean_up;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93f..572601e98dc 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
  */
 static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
-					int proto, int version,
-					const char *hostname,
-					int hostname_len)
+		int proto, int version, const char *hostname,
+		int hostname_len, const struct sockaddr_in *ssin)
 {
 	struct hlist_head *chain;
 	struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	struct nsm_handle *nsm = NULL;
 	int		hash;
 
-	dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+	dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
+			", p=%d, v=%d, my role=%s, name=%.*s)\n",
+			NIPQUAD(ssin->sin_addr.s_addr),
 			NIPQUAD(sin->sin_addr.s_addr), proto, version,
 			server? "server" : "client",
 			hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 			continue;
 		if (host->h_server != server)
 			continue;
+		if (!nlm_cmp_addr(&host->h_saddr, ssin))
+			continue;
 
 		/* Move to head of hash chain. */
 		hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	host->h_name	   = nsm->sm_name;
 	host->h_addr       = *sin;
 	host->h_addr.sin_port = 0;	/* ouch! */
+	host->h_saddr	   = *ssin;
 	host->h_version    = version;
 	host->h_proto      = proto;
 	host->h_rpcclnt    = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
 	 */
 	nsm_unmonitor(host);
 
-	if ((clnt = host->h_rpcclnt) != NULL) {
-		if (atomic_read(&clnt->cl_users)) {
-			printk(KERN_WARNING
-				"lockd: active RPC handle\n");
-			clnt->cl_dead = 1;
-		} else {
-			rpc_destroy_client(host->h_rpcclnt);
-		}
-	}
+	clnt = host->h_rpcclnt;
+	if (clnt != NULL)
+		rpc_shutdown_client(clnt);
 	kfree(host);
 }
 
@@ -180,8 +178,10 @@ struct nlm_host *
 nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
 	return nlm_lookup_host(0, sin, proto, version,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -191,9 +191,12 @@ struct nlm_host *
 nlmsvc_lookup_host(struct svc_rqst *rqstp,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
+	ssin.sin_addr = rqstp->rq_daddr.addr;
 	return nlm_lookup_host(1, svc_addr_in(rqstp),
 			       rqstp->rq_prot, rqstp->rq_vers,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
 
-	dprintk("lockd: nlm_bind_host(%08x)\n",
-			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
+	dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
+			NIPQUAD(host->h_saddr.sin_addr),
+			NIPQUAD(host->h_addr.sin_addr));
 
 	/* Lock host handle */
 	mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
 			.protocol	= host->h_proto,
 			.address	= (struct sockaddr *)&host->h_addr,
 			.addrsize	= sizeof(host->h_addr),
+			.saddress	= (struct sockaddr *)&host->h_saddr,
 			.timeout	= &timeparms,
 			.servername	= host->h_name,
 			.program	= &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134..3353ed8421a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
 			status);
 	else
 		status = 0;
+	rpc_shutdown_client(clnt);
  out:
 	return status;
 }
@@ -138,7 +139,6 @@ nsm_create(void)
 		.program	= &nsm_program,
 		.version	= SM_VERSION,
 		.authflavor	= RPC_AUTH_NULL,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT),
 	};
 
 	return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0..26809325469 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Process request with signals blocked, but allow SIGKILL.  */
 	allow_signal(SIGKILL);
 
-	/* kick rpciod */
-	rpciod_up();
-
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
 	if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-	/* release rpciod */
-	rpciod_down();
-
 	/* Release module */
 	unlock_kernel();
 	module_put_and_exit(0);
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d757..17765f697e5 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.fsync		= minix_sync_file,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/namespace.c b/fs/namespace.c
index b696e3a0d18..4198003d7e1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
+#include "internal.h"
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
@@ -320,22 +321,16 @@ EXPORT_SYMBOL(mnt_unpin);
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct mnt_namespace *n = m->private;
-	struct list_head *p;
-	loff_t l = *pos;
 
 	down_read(&namespace_sem);
-	list_for_each(p, &n->list)
-		if (!l--)
-			return list_entry(p, struct vfsmount, mnt_list);
-	return NULL;
+	return seq_list_start(&n->list, *pos);
 }
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct mnt_namespace *n = m->private;
-	struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
-	(*pos)++;
-	return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
+
+	return seq_list_next(v, &n->list, pos);
 }
 
 static void m_stop(struct seq_file *m, void *v)
@@ -350,7 +345,7 @@ static inline void mangle(struct seq_file *m, const char *s)
 
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
-	struct vfsmount *mnt = v;
+	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
 	int err = 0;
 	static struct proc_fs_info {
 		int flag;
@@ -405,7 +400,7 @@ struct seq_operations mounts_op = {
 
 static int show_vfsstat(struct seq_file *m, void *v)
 {
-	struct vfsmount *mnt = v;
+	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
 	int err = 0;
 
 	/* device */
@@ -1457,7 +1452,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	atomic_set(&new_ns->count, 1);
 	INIT_LIST_HEAD(&new_ns->list);
@@ -1471,7 +1466,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	if (!new_ns->root) {
 		up_write(&namespace_sem);
 		kfree(new_ns);
-		return NULL;
+		return ERR_PTR(-ENOMEM);;
 	}
 	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
@@ -1515,7 +1510,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	return new_ns;
 }
 
-struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns,
+struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 		struct fs_struct *new_fs)
 {
 	struct mnt_namespace *new_ns;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index d3152f8d95c..2b145de45b3 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -203,7 +203,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 
 	if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
 		if (pos >= MAX_NON_LFS) {
-			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
 		if (count > MAX_NON_LFS - (u32)pos) {
@@ -212,7 +211,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 	}
 	if (pos >= inode->i_sb->s_maxbytes) {
 		if (count || pos > inode->i_sb->s_maxbytes) {
-			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
 	}
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef..b55cb236cf7 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
 			   pagelist.o proc.o read.o symlink.o unlink.o \
-			   write.o namespace.o
-nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
+			   write.o namespace.o mount_clnt.o
+nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa490092..a49f9feff77 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 					   int nfsversion)
 {
 	struct nfs_client *clp;
-	int error;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto error_1;
-	}
-	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-
 	if (nfsversion == 4) {
 		if (nfs_callback_up() < 0)
 			goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 #ifdef CONFIG_NFS_V4
 	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 error_2:
-	rpciod_down();
-	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-error_1:
 	kfree(clp);
 error_0:
 	return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 #ifdef CONFIG_NFS_V4
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 		nfs4_kill_renewd(clp);
-	while (!list_empty(&clp->cl_unused)) {
-		struct nfs4_state_owner *sp;
-
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
+	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
 #endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 
-	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-		rpciod_down();
-
 	kfree(clp->cl_hostname);
 	kfree(clp);
 
@@ -1232,23 +1206,9 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
  */
 static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
 {
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
 	/* lock the list against modification */
 	spin_lock(&nfs_client_lock);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &nfs_client_list)
-		if (!pos--)
-			break;
-
-	return _p != &nfs_client_list ? _p : NULL;
+	return seq_list_start_head(&nfs_client_list, *_pos);
 }
 
 /*
@@ -1256,14 +1216,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
  */
 static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
 {
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
-
-	return _p != &nfs_client_list ? _p : NULL;
+	return seq_list_next(v, &nfs_client_list, pos);
 }
 
 /*
@@ -1282,7 +1235,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
 	struct nfs_client *clp;
 
 	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
+	if (v == &nfs_client_list) {
 		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
 		return 0;
 	}
@@ -1323,23 +1276,9 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
  */
 static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
 {
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
 	/* lock the list against modification */
 	spin_lock(&nfs_client_lock);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &nfs_volume_list)
-		if (!pos--)
-			break;
-
-	return _p != &nfs_volume_list ? _p : NULL;
+	return seq_list_start_head(&nfs_volume_list, *_pos);
 }
 
 /*
@@ -1347,14 +1286,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
  */
 static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
 {
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
-
-	return _p != &nfs_volume_list ? _p : NULL;
+	return seq_list_next(v, &nfs_volume_list, pos);
 }
 
 /*
@@ -1375,7 +1307,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 	char dev[8], fsid[17];
 
 	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
+	if (v == &nfs_volume_list) {
 		seq_puts(m, "NV SERVER   PORT DEV     FSID\n");
 		return 0;
 	}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83..20ac403469a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	kfree(delegation);
 }
 
+static void nfs_free_delegation_callback(struct rcu_head *head)
+{
+	struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
+
+	nfs_free_delegation(delegation);
+}
+
 static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
 	return status;
 }
 
-static void nfs_delegation_claim_opens(struct inode *inode)
+static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
 			continue;
 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 			continue;
+		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&inode->i_lock);
-		err = nfs4_open_delegation_recall(ctx->dentry, state);
+		err = nfs4_open_delegation_recall(ctx, state, stateid);
 		if (err >= 0)
 			err = nfs_delegation_claim_locks(ctx, state);
 		put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_delegation *delegation;
 	int status = 0;
 
-	/* Ensure we first revalidate the attributes and page cache! */
-	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
-		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
 	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
 	if (delegation == NULL)
 		return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	delegation->inode = inode;
 
 	spin_lock(&clp->cl_lock);
-	if (nfsi->delegation == NULL) {
-		list_add(&delegation->super_list, &clp->cl_delegations);
-		nfsi->delegation = delegation;
+	if (rcu_dereference(nfsi->delegation) == NULL) {
+		list_add_rcu(&delegation->super_list, &clp->cl_delegations);
 		nfsi->delegation_state = delegation->type;
+		rcu_assign_pointer(nfsi->delegation, delegation);
 		delegation = NULL;
 	} else {
 		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			status = -EIO;
 		}
 	}
+
+	/* Ensure we revalidate the attributes and page cache! */
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+	spin_unlock(&inode->i_lock);
+
 	spin_unlock(&clp->cl_lock);
 	kfree(delegation);
 	return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 	int res = 0;
 
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
-	nfs_free_delegation(delegation);
+	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 	return res;
 }
 
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int __nfs_inode_return_delegation(struct inode *inode)
+static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_delegation *delegation;
-	int res = 0;
 
 	nfs_msync_inode(inode);
 	down_read(&clp->cl_sem);
 	/* Guard against new delegated open calls */
 	down_write(&nfsi->rwsem);
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
-	if (delegation != NULL)
-		res = nfs_do_return_delegation(inode, delegation);
-	return res;
+	return nfs_do_return_delegation(inode, delegation);
+}
+
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+	if (delegation == NULL)
+		goto nomatch;
+	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+				sizeof(delegation->stateid.data)) != 0)
+		goto nomatch;
+	list_del_rcu(&delegation->super_list);
+	nfsi->delegation_state = 0;
+	rcu_assign_pointer(nfsi->delegation, NULL);
+	return delegation;
+nomatch:
+	return NULL;
+}
+
+int nfs_inode_return_delegation(struct inode *inode)
+{
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int err = 0;
+
+	if (rcu_dereference(nfsi->delegation) != NULL) {
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		spin_unlock(&clp->cl_lock);
+		if (delegation != NULL)
+			err = __nfs_inode_return_delegation(inode, delegation);
+	}
+	return err;
 }
 
 /*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (delegation->inode->i_sb != sb)
 			continue;
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
 
 	allow_signal(SIGKILL);
 restart:
-	spin_lock(&clp->cl_lock);
 	if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
 		goto out;
 	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
 		goto out;
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
+	rcu_read_unlock();
 out:
-	spin_unlock(&clp->cl_lock);
 	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
 	down_read(&clp->cl_sem);
 	down_write(&nfsi->rwsem);
 	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL && memcmp(delegation->stateid.data,
-				args->stateid->data,
-				sizeof(delegation->stateid.data)) == 0) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
+	delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
+	if (delegation != NULL)
 		args->result = 0;
-	} else {
-		delegation = NULL;
+	else
 		args->result = -ENOENT;
-	}
 	spin_unlock(&clp->cl_lock);
 	complete(&args->started);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, args->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
 			res = igrab(delegation->inode);
 			break;
 		}
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 	return res;
 }
 
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
 		delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 /*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
  */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation, *n;
-	LIST_HEAD(head);
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+	struct nfs_delegation *delegation;
+restart:
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
 			continue;
-		list_move(&delegation->super_list, &head);
-		NFS_I(delegation->inode)->delegation = NULL;
-		NFS_I(delegation->inode)->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	while(!list_empty(&head)) {
-		delegation = list_entry(head.next, struct nfs_delegation, super_list);
-		list_del(&delegation->super_list);
-		nfs_free_delegation(delegation);
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
+		spin_unlock(&clp->cl_lock);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+		goto restart;
 	}
+	rcu_read_unlock();
 }
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
-	int res = 0;
+	int ret = 0;
 
-	if (nfsi->delegation_state == 0)
-		return 0;
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
 	if (delegation != NULL) {
 		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
-		res = 1;
+		ret = 1;
 	}
-	spin_unlock(&clp->cl_lock);
-	return res;
+	rcu_read_unlock();
+	return ret;
 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7f..5874ce7fdba 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
 	long flags;
 	loff_t maxsize;
 	__u64 change_attr;
+	struct rcu_head rcu;
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int __nfs_inode_return_delegation(struct inode *inode);
+int nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
+	struct nfs_delegation *delegation;
+	int ret = 0;
+
 	flags &= FMODE_READ|FMODE_WRITE;
-	smp_rmb();
-	if ((NFS_I(inode)->delegation_state & flags) == flags)
-		return 1;
-	return 0;
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL && (delegation->type & flags) == flags)
+		ret = 1;
+	rcu_read_unlock();
+	return ret;
 }
 
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
-	int err = 0;
-
-	if (NFS_I(inode)->delegation != NULL)
-		err = __nfs_inode_return_delegation(inode);
-	return err;
-}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e..322141f4ab4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
-				 struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
-		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+		/* Revalidate fsid using the parent directory */
+		return __nfs_revalidate_inode(server, dir);
 	return 0;
 }
 
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(dir, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if (nd && (nd->flags & LOOKUP_CREATE))
+	if ((nd->flags & LOOKUP_CREATE) != 0)
 		open_flags = nd->intent.open.flags;
 
 	lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 
 	lock_kernel();
 
-	page = alloc_page(GFP_KERNEL);
+	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		unlock_kernel();
 		return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 	struct nfs_inode *nfsi;
 	struct nfs_access_entry *cache;
 
-	spin_lock(&nfs_access_lru_lock);
 restart:
+	spin_lock(&nfs_access_lru_lock);
 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
 		struct inode *inode;
 
@@ -1770,6 +1769,7 @@ remove_lru_entry:
 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
 		}
 		spin_unlock(&inode->i_lock);
+		spin_unlock(&nfs_access_lru_lock);
 		iput(inode);
 		goto restart;
 	}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510f..a5c82b6f3b4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int pgbase;
 	int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_readdata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_readdata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
 static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int pgbase;
 	int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_writedata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_writedata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
-
-	if (count < 0)
 		goto out;
+
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
@@ -814,7 +822,7 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	ssize_t retval;
+	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	/* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
+		goto out;
 
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a0..8689b736fdd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
+					struct pipe_inode_info *pipe,
+					size_t count, unsigned int flags);
 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
 	.fsync		= nfs_fsync,
 	.lock		= nfs_lock,
 	.flock		= nfs_flock,
-	.sendfile	= nfs_file_sendfile,
+	.splice_read	= nfs_file_splice_read,
 	.check_flags	= nfs_check_flags,
 };
 
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static ssize_t
-nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
-		read_actor_t actor, void *target)
+nfs_file_splice_read(struct file *filp, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t count,
+		     unsigned int flags)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	ssize_t res;
 
-	dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+	dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long long) *ppos);
 
 	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (!res)
-		res = generic_file_sendfile(filp, ppos, count, actor, target);
+		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 	return res;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a83659..3d9fccf4ef9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx != NULL) {
-		atomic_set(&ctx->count, 1);
-		ctx->dentry = dget(dentry);
-		ctx->vfsmnt = mntget(mnt);
+		ctx->path.dentry = dget(dentry);
+		ctx->path.mnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
+		kref_init(&ctx->kref);
 	}
 	return ctx;
 }
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
 	if (ctx != NULL)
-		atomic_inc(&ctx->count);
+		kref_get(&ctx->kref);
 	return ctx;
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void nfs_free_open_context(struct kref *kref)
 {
-	if (atomic_dec_and_test(&ctx->count)) {
-		if (!list_empty(&ctx->list)) {
-			struct inode *inode = ctx->dentry->d_inode;
-			spin_lock(&inode->i_lock);
-			list_del(&ctx->list);
-			spin_unlock(&inode->i_lock);
-		}
-		if (ctx->state != NULL)
-			nfs4_close_state(ctx->state, ctx->mode);
-		if (ctx->cred != NULL)
-			put_rpccred(ctx->cred);
-		dput(ctx->dentry);
-		mntput(ctx->vfsmnt);
-		kfree(ctx);
+	struct nfs_open_context *ctx = container_of(kref,
+			struct nfs_open_context, kref);
+
+	if (!list_empty(&ctx->list)) {
+		struct inode *inode = ctx->path.dentry->d_inode;
+		spin_lock(&inode->i_lock);
+		list_del(&ctx->list);
+		spin_unlock(&inode->i_lock);
 	}
+	if (ctx->state != NULL)
+		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	if (ctx->cred != NULL)
+		put_rpccred(ctx->cred);
+	dput(ctx->path.dentry);
+	mntput(ctx->path.mnt);
+	kfree(ctx);
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	kref_put(&ctx->kref, nfs_free_open_context);
 }
 
 /*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		goto out_changed;
 
 	server = NFS_SERVER(inode);
-	/* Update the fsid if and only if this is the root directory */
-	if (inode == inode->i_sb->s_root->d_inode
+	/* Update the fsid? */
+	if (S_ISDIR(inode->i_mode)
 			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		server->fsid = fattr->fsid;
 
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		invalid &= ~NFS_INO_INVALID_DATA;
 	if (data_stable)
 		invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
-	if (!nfs_have_delegation(inode, FMODE_READ))
+	if (!nfs_have_delegation(inode, FMODE_READ) ||
+			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
+	nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
 
 	return 0;
  out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
 void nfs4_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
-	/* Now clear out any remaining state */
-	while (!list_empty(&nfsi->open_states)) {
-		struct nfs4_state *state;
-		
-		state = list_entry(nfsi->open_states.next,
-				struct nfs4_state,
-				inode_states);
-		dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
-				__FUNCTION__,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				state);
-		BUG_ON(atomic_read(&state->count) != 1);
-		nfs4_close_state(state, state->state);
-	}
 }
 #endif
 
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
 	inode_init_once(&nfsi->vfs_inode);
-	spin_lock_init(&nfsi->req_lock);
-	INIT_LIST_HEAD(&nfsi->dirty);
-	INIT_LIST_HEAD(&nfsi->commit);
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	atomic_set(&nfsi->data_updates, 0);
-	nfsi->ndirty = 0;
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
 	nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e6..76cf55d5710 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
 /*
  * Calculate the number of 512byte blocks used.
  */
-static inline unsigned long nfs_calc_block_size(u64 tsize)
+static inline blkcnt_t nfs_calc_block_size(u64 tsize)
 {
-	loff_t used = (tsize + 511) >> 9;
+	blkcnt_t used = (tsize + 511) >> 9;
 	return (used > ULONG_MAX) ? ULONG_MAX : used;
 }
 
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a314..8afd9f7e7a9 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
 /*
- * linux/fs/nfs/mount_clnt.c
- *
- * MOUNT client to support NFSroot.
+ * In-kernel MOUNT protocol client
  *
  * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
  */
@@ -18,33 +16,31 @@
 #include <linux/nfs_fs.h>
 
 #ifdef RPC_DEBUG
-# define NFSDBG_FACILITY	NFSDBG_ROOT
+# define NFSDBG_FACILITY	NFSDBG_MOUNT
 #endif
 
-/*
-#define MOUNT_PROGRAM		100005
-#define MOUNT_VERSION		1
-#define MOUNT_MNT		1
-#define MOUNT_UMNT		3
- */
-
-static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *,
-								int, int);
 static struct rpc_program	mnt_program;
 
 struct mnt_fhstatus {
-	unsigned int		status;
-	struct nfs_fh *		fh;
+	u32 status;
+	struct nfs_fh *fh;
 };
 
-/*
- * Obtain an NFS file handle for the given host and path
+/**
+ * nfs_mount - Obtain an NFS file handle for the given host and path
+ * @addr: pointer to server's address
+ * @len: size of server's address
+ * @hostname: name of server host, or NULL
+ * @path: pointer to string containing export path to mount
+ * @version: mount version to use for this request
+ * @protocol: transport protocol to use for thie request
+ * @fh: pointer to location to place returned file handle
+ *
+ * Uses default timeout parameters specified by underlying transport.
  */
-int
-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
-		int version, int protocol)
+int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
+	      int version, int protocol, struct nfs_fh *fh)
 {
-	struct rpc_clnt		*mnt_clnt;
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		.rpc_argp	= path,
 		.rpc_resp	= &result,
 	};
-	char			hostname[32];
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= addr,
+		.addrsize	= len,
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= RPC_CLNT_CREATE_INTR,
+	};
+	struct rpc_clnt		*mnt_clnt;
 	int			status;
 
-	dprintk("NFS:      nfs_mount(%08x:%s)\n",
-			(unsigned)ntohl(addr->sin_addr.s_addr), path);
+	dprintk("NFS: sending MNT request for %s:%s\n",
+		(hostname ? hostname : "server"), path);
 
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
-	mnt_clnt = mnt_create(hostname, addr, version, protocol);
+	mnt_clnt = rpc_create(&args);
 	if (IS_ERR(mnt_clnt))
-		return PTR_ERR(mnt_clnt);
+		goto out_clnt_err;
 
 	if (version == NFS_MNT3_VERSION)
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
 
 	status = rpc_call_sync(mnt_clnt, &msg, 0);
-	return status < 0? status : (result.status? -EACCES : 0);
-}
+	rpc_shutdown_client(mnt_clnt);
 
-static struct rpc_clnt *
-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
-		int protocol)
-{
-	struct rpc_create_args args = {
-		.protocol	= protocol,
-		.address	= (struct sockaddr *)srvaddr,
-		.addrsize	= sizeof(*srvaddr),
-		.servername	= hostname,
-		.program	= &mnt_program,
-		.version	= version,
-		.authflavor	= RPC_AUTH_UNIX,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT |
-				   RPC_CLNT_CREATE_INTR),
-	};
+	if (status < 0)
+		goto out_call_err;
+	if (result.status != 0)
+		goto out_mnt_err;
+
+	dprintk("NFS: MNT request succeeded\n");
+	status = 0;
+
+out:
+	return status;
+
+out_clnt_err:
+	status = PTR_ERR(mnt_clnt);
+	dprintk("NFS: failed to create RPC client, status=%d\n", status);
+	goto out;
+
+out_call_err:
+	dprintk("NFS: failed to start MNT request, status=%d\n", status);
+	goto out;
 
-	return rpc_create(&args);
+out_mnt_err:
+	dprintk("NFS: MNT server returned result %d\n", result.status);
+	status = -EACCES;
+	goto out;
 }
 
 /*
  * XDR encode/decode functions for MOUNT
  */
-static int
-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
+			      const char *path)
 {
 	p = xdr_encode_string(p, path);
 
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
+			       struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
+				struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 #define MNT_fhstatus_sz		(1 + 8)
 #define MNT_fhstatus3_sz	(1 + 16)
 
-static struct rpc_procinfo	mnt_procedures[] = {
-[MNTPROC_MNT] = {
-	  .p_proc		= MNTPROC_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus_sz,
-	  .p_statidx		= MNTPROC_MNT,
-	  .p_name		= "MOUNT",
+static struct rpc_procinfo mnt_procedures[] = {
+	[MNTPROC_MNT] = {
+		.p_proc		= MNTPROC_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus_sz,
+		.p_statidx	= MNTPROC_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 static struct rpc_procinfo mnt3_procedures[] = {
-[MOUNTPROC3_MNT] = {
-	  .p_proc		= MOUNTPROC3_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus3_sz,
-	  .p_statidx		= MOUNTPROC3_MNT,
-	  .p_name		= "MOUNT",
+	[MOUNTPROC3_MNT] = {
+		.p_proc		= MOUNTPROC3_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus3,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus3_sz,
+		.p_statidx	= MOUNTPROC3_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 
-static struct rpc_version	mnt_version1 = {
-		.number		= 1,
-		.nrprocs 	= 2,
-		.procs 		= mnt_procedures
+static struct rpc_version mnt_version1 = {
+	.number		= 1,
+	.nrprocs	= 2,
+	.procs		= mnt_procedures,
 };
 
-static struct rpc_version       mnt_version3 = {
-		.number		= 3,
-		.nrprocs	= 2,
-		.procs		= mnt3_procedures
+static struct rpc_version mnt_version3 = {
+	.number		= 3,
+	.nrprocs	= 2,
+	.procs		= mnt3_procedures,
 };
 
-static struct rpc_version *	mnt_version[] = {
+static struct rpc_version *mnt_version[] = {
 	NULL,
 	&mnt_version1,
 	NULL,
 	&mnt_version3,
 };
 
-static struct rpc_stat		mnt_stats;
+static struct rpc_stat mnt_stats;
 
-static struct rpc_program	mnt_program = {
+static struct rpc_program mnt_program = {
 	.name		= "mount",
 	.number		= NFS_MNT_PROGRAM,
 	.nrvers		= ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3d..7fcc78f2aa7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
 static int
 nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 offset = (u32)args->offset;
 	u32 count = args->count;
@@ -380,7 +380,7 @@ static int
 nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
 	struct rpc_task	*task = req->rq_task;
-	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_auth	*auth = task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 static int
 nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2..814d886b6aa 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
 		 * not sure this buys us anything (and I'd have
 		 * to revamp the NFSv3 XDR code) */
 		status = nfs3_proc_setattr(dentry, &fattr, sattr);
-		if (status == 0)
-			nfs_setattr_update_inode(dentry->d_inode, sattr);
-		nfs_refresh_inode(dentry->d_inode, &fattr);
+		nfs_post_op_update_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
 	if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f0..b4647a22f34 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 static int
 nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 static int
 nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -643,7 +643,7 @@ static int
 nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 		    struct nfs3_getaclargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 static int
 nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c0..6c028e734fe 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 		seqid->flags |= NFS_SEQID_CONFIRMED;
 }
 
+struct nfs_unique_id {
+	struct rb_node rb_node;
+	__u64 id;
+};
+
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
  */
 struct nfs4_state_owner {
-	spinlock_t	     so_lock;
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs_unique_id so_owner_id;
 	struct nfs_client    *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	atomic_t	     so_count;
+	struct nfs_server    *so_server;
+	struct rb_node	     so_client_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
+
+	spinlock_t	     so_lock;
+	atomic_t	     so_count;
 	struct list_head     so_states;
 	struct list_head     so_delegations;
 	struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
 	struct nfs_seqid_counter	ls_seqid;
-	u32			ls_id;
+	struct nfs_unique_id	ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
 };
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
 /* bits for nfs4_state->flags */
 enum {
 	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
+	NFS_DELEGATED_STATE,		/* Current stateid is delegation */
+	NFS_O_RDONLY_STATE,		/* OPEN stateid has read-only state */
+	NFS_O_WRONLY_STATE,		/* OPEN stateid has write-only state */
+	NFS_O_RDWR_STATE,		/* OPEN stateid has read/write state */
 };
 
 struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
 	unsigned long flags;		/* Do we hold any locks? */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
-	nfs4_stateid stateid;
+	seqlock_t seqlock;		/* Protects the stateid/open_stateid */
+	nfs4_stateid stateid;		/* Current stateid: may be delegation */
+	nfs4_stateid open_stateid;	/* OPEN stateid */
 
-	unsigned int n_rdonly;
-	unsigned int n_wronly;
-	unsigned int n_rdwr;
+	/* The following 3 fields are protected by owner->so_lock */
+	unsigned int n_rdonly;		/* Number of read-only references */
+	unsigned int n_wronly;		/* Number of write-only references */
+	unsigned int n_rdwr;		/* Number of read/write references */
 	int state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_state(a, b, c) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90..fee2da856c9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 }
 
 struct nfs4_opendata {
-	atomic_t count;
+	struct kref kref;
 	struct nfs_openargs o_arg;
 	struct nfs_openres o_res;
 	struct nfs_open_confirmargs c_arg;
 	struct nfs_open_confirmres c_res;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
-	struct dentry *dentry;
+	struct path path;
 	struct dentry *dir;
 	struct nfs4_state_owner *owner;
+	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
+	unsigned int rpc_done : 1;
 	int rpc_status;
 	int cancelled;
 };
 
-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+
+static void nfs4_init_opendata_res(struct nfs4_opendata *p)
+{
+	p->o_res.f_attr = &p->f_attr;
+	p->o_res.dir_attr = &p->dir_attr;
+	p->o_res.server = p->o_arg.server;
+	nfs_fattr_init(&p->f_attr);
+	nfs_fattr_init(&p->dir_attr);
+}
+
+static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 		struct nfs4_state_owner *sp, int flags,
 		const struct iattr *attrs)
 {
-	struct dentry *parent = dget_parent(dentry);
+	struct dentry *parent = dget_parent(path->dentry);
 	struct inode *dir = parent->d_inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (p->o_arg.seqid == NULL)
 		goto err_free;
-	atomic_set(&p->count, 1);
-	p->dentry = dget(dentry);
+	p->path.mnt = mntget(path->mnt);
+	p->path.dentry = dget(path->dentry);
 	p->dir = parent;
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
 	p->o_arg.clientid = server->nfs_client->cl_clientid;
-	p->o_arg.id = sp->so_id;
-	p->o_arg.name = &dentry->d_name;
+	p->o_arg.id = sp->so_owner_id.id;
+	p->o_arg.name = &p->path.dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-	p->o_res.f_attr = &p->f_attr;
-	p->o_res.dir_attr = &p->dir_attr;
-	p->o_res.server = server;
-	nfs_fattr_init(&p->f_attr);
-	nfs_fattr_init(&p->dir_attr);
 	if (flags & O_EXCL) {
 		u32 *s = (u32 *) p->o_arg.u.verifier.data;
 		s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->c_arg.fh = &p->o_res.fh;
 	p->c_arg.stateid = &p->o_res.stateid;
 	p->c_arg.seqid = p->o_arg.seqid;
+	nfs4_init_opendata_res(p);
+	kref_init(&p->kref);
 	return p;
 err_free:
 	kfree(p);
@@ -282,27 +292,25 @@ err:
 	return NULL;
 }
 
-static void nfs4_opendata_free(struct nfs4_opendata *p)
+static void nfs4_opendata_free(struct kref *kref)
 {
-	if (p != NULL && atomic_dec_and_test(&p->count)) {
-		nfs_free_seqid(p->o_arg.seqid);
-		nfs4_put_state_owner(p->owner);
-		dput(p->dir);
-		dput(p->dentry);
-		kfree(p);
-	}
+	struct nfs4_opendata *p = container_of(kref,
+			struct nfs4_opendata, kref);
+
+	nfs_free_seqid(p->o_arg.seqid);
+	if (p->state != NULL)
+		nfs4_put_open_state(p->state);
+	nfs4_put_state_owner(p->owner);
+	dput(p->dir);
+	dput(p->path.dentry);
+	mntput(p->path.mnt);
+	kfree(p);
 }
 
-/* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt,
-		const struct rpc_call_ops *tk_ops, void *calldata)
+static void nfs4_opendata_put(struct nfs4_opendata *p)
 {
-	struct rpc_task *task;
-
-	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
-		return -ENOMEM;
-	rpc_execute(task);
-	return 0;
+	if (p != NULL)
+		kref_put(&p->kref, nfs4_opendata_free);
 }
 
 static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
 	return ret;
 }
 
-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static int can_open_cached(struct nfs4_state *state, int mode)
+{
+	int ret = 0;
+	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+		case FMODE_READ:
+			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_WRITE:
+			ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+	}
+	return ret;
+}
+
+static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+{
+	if ((delegation->type & open_flags) != open_flags)
+		return 0;
+	if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
+		return 0;
+	return 1;
+}
+
+static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 {
 	switch (open_flags) {
 		case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr++;
 	}
+	nfs4_state_set_mode_locked(state, state->state | open_flags);
 }
 
-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
-	struct inode *inode = state->inode;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+	switch (open_flags) {
+		case FMODE_READ:
+			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+			break;
+		case FMODE_WRITE:
+			set_bit(NFS_O_WRONLY_STATE, &state->flags);
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			set_bit(NFS_O_RDWR_STATE, &state->flags);
+	}
+}
+
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+	write_seqlock(&state->seqlock);
+	nfs_set_open_stateid_locked(state, stateid, open_flags);
+	write_sequnlock(&state->seqlock);
+}
 
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
+{
 	open_flags &= (FMODE_READ|FMODE_WRITE);
-	/* Protect against nfs4_find_state_byowner() */
+	/*
+	 * Protect the call to nfs4_state_set_mode_locked and
+	 * serialise the stateid update
+	 */
+	write_seqlock(&state->seqlock);
+	if (deleg_stateid != NULL) {
+		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+		set_bit(NFS_DELEGATED_STATE, &state->flags);
+	}
+	if (open_stateid != NULL)
+		nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+	write_sequnlock(&state->seqlock);
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&inode->i_lock);
-	memcpy(&state->stateid, stateid, sizeof(state->stateid));
 	update_open_stateflags(state, open_flags);
-	nfs4_state_set_mode_locked(state, state->state | open_flags);
-	spin_unlock(&inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 }
 
+static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+{
+	struct nfs_delegation *delegation;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+	nfs_inode_return_delegation(inode);
+}
+
+static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
+{
+	struct nfs4_state *state = opendata->state;
+	struct nfs_inode *nfsi = NFS_I(state->inode);
+	struct nfs_delegation *delegation;
+	int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+	nfs4_stateid stateid;
+	int ret = -EAGAIN;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
+	for (;;) {
+		if (can_open_cached(state, open_mode)) {
+			spin_lock(&state->owner->so_lock);
+			if (can_open_cached(state, open_mode)) {
+				update_open_stateflags(state, open_mode);
+				spin_unlock(&state->owner->so_lock);
+				rcu_read_unlock();
+				goto out_return_state;
+			}
+			spin_unlock(&state->owner->so_lock);
+		}
+		if (delegation == NULL)
+			break;
+		if (!can_open_delegated(delegation, open_mode))
+			break;
+		/* Save the delegation */
+		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+		rcu_read_unlock();
+		lock_kernel();
+		ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+		unlock_kernel();
+		if (ret != 0)
+			goto out;
+		ret = -EAGAIN;
+		rcu_read_lock();
+		delegation = rcu_dereference(nfsi->delegation);
+		/* If no delegation, try a cached open */
+		if (delegation == NULL)
+			continue;
+		/* Is the delegation still valid? */
+		if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
+			continue;
+		rcu_read_unlock();
+		update_open_stateid(state, NULL, &stateid, open_mode);
+		goto out_return_state;
+	}
+	rcu_read_unlock();
+out:
+	return ERR_PTR(ret);
+out_return_state:
+	atomic_inc(&state->count);
+	return state;
+}
+
 static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
 	struct inode *inode;
 	struct nfs4_state *state = NULL;
+	struct nfs_delegation *delegation;
+	nfs4_stateid *deleg_stateid = NULL;
+	int ret;
 
-	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+	if (!data->rpc_done) {
+		state = nfs4_try_open_cached(data);
 		goto out;
+	}
+
+	ret = -EAGAIN;
+	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+		goto err;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+	ret = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out;
+		goto err;
+	ret = -ENOMEM;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
-		goto put_inode;
-	update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
-put_inode:
+		goto err_put_inode;
+	if (data->o_res.delegation_type != 0) {
+		int delegation_flags = 0;
+
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(inode)->delegation);
+		if (delegation)
+			delegation_flags = delegation->flags;
+		rcu_read_unlock();
+		if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+			nfs_inode_set_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+		else
+			nfs_inode_reclaim_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+	}
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL)
+		deleg_stateid = &delegation->stateid;
+	update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
+	rcu_read_unlock();
 	iput(inode);
 out:
 	return state;
+err_put_inode:
+	iput(inode);
+err:
+	return ERR_PTR(ret);
 }
 
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
 	return ERR_PTR(-ENOENT);
 }
 
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
 {
+	struct nfs4_state *newstate;
 	int ret;
 
 	opendata->o_arg.open_flags = openflags;
+	memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+	memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+	nfs4_init_opendata_res(opendata);
 	ret = _nfs4_proc_open(opendata);
 	if (ret != 0)
 		return ret; 
-	memcpy(stateid->data, opendata->o_res.stateid.data,
-			sizeof(stateid->data));
+	newstate = nfs4_opendata_to_nfs4_state(opendata);
+	if (IS_ERR(newstate))
+		return PTR_ERR(newstate);
+	nfs4_close_state(&opendata->path, newstate, openflags);
+	*res = newstate;
 	return 0;
 }
 
 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
 {
-	nfs4_stateid stateid;
 	struct nfs4_state *newstate;
-	int mode = 0;
-	int delegation = 0;
 	int ret;
 
 	/* memory barrier prior to reading state->n_* */
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	smp_rmb();
 	if (state->n_rdwr != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ|FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_wronly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_rdonly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ;
+		if (newstate != state)
+			return -ESTALE;
 	}
-	clear_bit(NFS_DELEGATED_STATE, &state->flags);
-	if (mode == 0)
-		return 0;
-	if (opendata->o_res.delegation_type == 0)
-		opendata->o_res.delegation_type = delegation;
-	opendata->o_arg.open_flags |= mode;
-	newstate = nfs4_opendata_to_nfs4_state(opendata);
-	if (newstate != NULL) {
-		if (opendata->o_res.delegation_type != 0) {
-			struct nfs_inode *nfsi = NFS_I(newstate->inode);
-			int delegation_flags = 0;
-			if (nfsi->delegation)
-				delegation_flags = nfsi->delegation->flags;
-			if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
-				nfs_inode_set_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-			else
-				nfs_inode_reclaim_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-		}
-		nfs4_close_state(newstate, opendata->o_arg.open_flags);
+	/*
+	 * We may have performed cached opens for all three recoveries.
+	 * Check if we need to update the current stateid.
+	 */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+		write_seqlock(&state->seqlock);
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+		write_sequnlock(&state->seqlock);
 	}
-	if (newstate != state)
-		return -ESTALE;
 	return 0;
 }
 
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
  */
-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+	struct nfs_delegation *delegation;
 	struct nfs4_opendata *opendata;
 	int delegation_type = 0;
 	int status;
 
-	if (delegation != NULL) {
-		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-			memcpy(&state->stateid, &delegation->stateid,
-					sizeof(state->stateid));
-			set_bit(NFS_DELEGATED_STATE, &state->flags);
-			return 0;
-		}
-		delegation_type = delegation->type;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
 	opendata->o_arg.fh = NFS_FH(state->inode);
 	nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+	if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
+		delegation_type = delegation->flags;
+	rcu_read_unlock();
 	opendata->o_arg.u.delegation_type = delegation_type;
 	status = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return status;
 }
 
-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_do_open_reclaim(sp, state, dentry);
+		err = _nfs4_do_open_reclaim(ctx, state);
 		if (err != -NFS4ERR_DELAY)
 			break;
 		nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+	ret = nfs4_do_open_reclaim(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_state_owner  *sp  = state->owner;
 	struct nfs4_opendata *opendata;
 	int ret;
 
-	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
-	memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
 			sizeof(opendata->o_arg.u.delegation.data));
 	ret = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_exception exception = { };
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	int err;
 	do {
-		err = _nfs4_open_delegation_recall(dentry, state);
+		err = _nfs4_open_delegation_recall(ctx, state, stateid);
 		switch (err) {
 			case 0:
 				return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
 				sizeof(data->o_res.stateid.data));
 		renew_lease(data->o_res.server, data->timestamp);
+		data->rpc_done = 1;
 	}
-	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 	nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
 
 static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (!data->rpc_done)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	
 	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		return;
+	/*
+	 * Check if we still need to send an OPEN call, or if we can use
+	 * a delegation instead.
+	 */
+	if (data->state != NULL) {
+		struct nfs_delegation *delegation;
+
+		if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+			goto out_no_action;
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+		if (delegation != NULL &&
+		   (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
+			rcu_read_unlock();
+			goto out_no_action;
+		}
+		rcu_read_unlock();
+	}
 	/* Update sequence id. */
-	data->o_arg.id = sp->so_id;
+	data->o_arg.id = sp->so_owner_id.id;
 	data->o_arg.clientid = sp->so_client->cl_clientid;
 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
 	data->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
+	return;
+out_no_action:
+	task->tk_action = NULL;
+
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 				data->rpc_status = -ENOTDIR;
 		}
 		renew_lease(data->o_res.server, data->timestamp);
+		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
+			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
 	nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+	data->rpc_done = 1;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (data->rpc_status != 0 || !data->rpc_done)
 		goto out_free;
 	/* In case we need an open_confirm, no cleanup! */
 	if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
+	data->cancelled = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	} else
 		status = data->rpc_status;
 	rpc_put_task(task);
-	if (status != 0)
+	if (status != 0 || !data->rpc_done)
 		return status;
 
 	if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		if (status != 0)
 			return status;
 	}
-	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 		mask |= MAY_READ;
 	if (openflags & FMODE_WRITE)
 		mask |= MAY_WRITE;
+	if (openflags & FMODE_EXEC)
+		mask |= MAY_EXEC;
 	status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
 		goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
  * 	reclaim state on the server after a network partition.
  * 	Assumes caller holds the appropriate lock
  */
-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct inode *inode = state->inode;
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs4_opendata *opendata;
-	int openflags = state->state & (FMODE_READ|FMODE_WRITE);
 	int ret;
 
-	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-		ret = _nfs4_do_access(inode, sp->so_cred, openflags);
-		if (ret < 0)
-			return ret;
-		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
-		set_bit(NFS_DELEGATED_STATE, &state->flags);
-		return 0;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	ret = nfs4_open_recover(opendata, state);
 	if (ret == -ESTALE) {
 		/* Invalidate the state owner so we don't ever use it again */
-		nfs4_drop_state_owner(sp);
-		d_drop(dentry);
+		nfs4_drop_state_owner(state->owner);
+		d_drop(ctx->path.dentry);
 	}
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 
 	do {
-		err = _nfs4_open_expired(sp, state, dentry);
+		err = _nfs4_open_expired(ctx, state);
 		if (err == -NFS4ERR_DELAY)
 			nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+	ret = nfs4_do_open_expired(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
 /*
- * Returns a referenced nfs4_state if there is an open delegation on the file
+ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
+ * fields corresponding to attributes that were used to store the verifier.
+ * Make sure we clobber those fields in the later setattr call
  */
-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
-{
-	struct nfs_delegation *delegation;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs_client;
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs4_state_owner *sp = NULL;
-	struct nfs4_state *state = NULL;
-	int open_flags = flags & (FMODE_READ|FMODE_WRITE);
-	int err;
-
-	err = -ENOMEM;
-	if (!(sp = nfs4_get_state_owner(server, cred))) {
-		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
-		return err;
-	}
-	err = nfs4_recover_expired_lease(server);
-	if (err != 0)
-		goto out_put_state_owner;
-	/* Protect against reboot recovery - NOTE ORDER! */
-	down_read(&clp->cl_sem);
-	/* Protect against delegation recall */
-	down_read(&nfsi->rwsem);
-	delegation = NFS_I(inode)->delegation;
-	err = -ENOENT;
-	if (delegation == NULL || (delegation->type & open_flags) != open_flags)
-		goto out_err;
-	err = -ENOMEM;
-	state = nfs4_get_open_state(inode, sp);
-	if (state == NULL)
-		goto out_err;
-
-	err = -ENOENT;
-	if ((state->state & open_flags) == open_flags) {
-		spin_lock(&inode->i_lock);
-		update_open_stateflags(state, open_flags);
-		spin_unlock(&inode->i_lock);
-		goto out_ok;
-	} else if (state->state != 0)
-		goto out_put_open_state;
-
-	lock_kernel();
-	err = _nfs4_do_access(inode, cred, open_flags);
-	unlock_kernel();
-	if (err != 0)
-		goto out_put_open_state;
-	set_bit(NFS_DELEGATED_STATE, &state->flags);
-	update_open_stateid(state, &delegation->stateid, open_flags);
-out_ok:
-	nfs4_put_state_owner(sp);
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	*res = state;
-	return 0;
-out_put_open_state:
-	nfs4_put_open_state(state);
-out_err:
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	if (err != -EACCES)
-		nfs_inode_return_delegation(inode);
-out_put_state_owner:
-	nfs4_put_state_owner(sp);
-	return err;
-}
-
-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
 {
-	struct nfs4_exception exception = { };
-	struct nfs4_state *res = ERR_PTR(-EIO);
-	int err;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
+	    !(sattr->ia_valid & ATTR_ATIME_SET))
+		sattr->ia_valid |= ATTR_ATIME;
 
-	do {
-		err = _nfs4_open_delegated(inode, flags, cred, &res);
-		if (err == 0)
-			break;
-		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
-					err, &exception));
-	} while (exception.retry);
-	return res;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
+	    !(sattr->ia_valid & ATTR_MTIME_SET))
+		sattr->ia_valid |= ATTR_MTIME;
 }
 
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
-	int                     status;
+	int status;
 
 	/* Protect against reboot recovery conflicts */
 	status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	status = nfs4_recover_expired_lease(server);
 	if (status != 0)
 		goto err_put_state_owner;
+	if (path->dentry->d_inode != NULL)
+		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
 	down_read(&clp->cl_sem);
 	status = -ENOMEM;
-	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
 	if (opendata == NULL)
 		goto err_release_rwsem;
 
+	if (path->dentry->d_inode != NULL)
+		opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
+
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
-		goto err_opendata_free;
+		goto err_opendata_put;
+
+	if (opendata->o_arg.open_flags & O_EXCL)
+		nfs4_exclusive_attrset(opendata, sattr);
 
-	status = -ENOMEM;
 	state = nfs4_opendata_to_nfs4_state(opendata);
-	if (state == NULL)
-		goto err_opendata_free;
-	if (opendata->o_res.delegation_type != 0)
-		nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
-	nfs4_opendata_free(opendata);
+	status = PTR_ERR(state);
+	if (IS_ERR(state))
+		goto err_opendata_put;
+	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
-err_opendata_free:
-	nfs4_opendata_free(opendata);
+err_opendata_put:
+	nfs4_opendata_put(opendata);
 err_release_rwsem:
 	up_read(&clp->cl_sem);
 err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * the user though...
 		 */
 		if (status == -NFS4ERR_BAD_SEQID) {
-			printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+			printk(KERN_WARNING "NFS: v4 server %s "
+					" returned a bad sequence-id error!\n",
+					NFS_SERVER(dir)->nfs_client->cl_hostname);
 			exception.retry = 1;
 			continue;
 		}
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		if (status == -EAGAIN) {
+			/* We must have found a delegation */
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
 }
 
 struct nfs4_closedata {
+	struct path path;
 	struct inode *inode;
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
+	dput(calldata->path.dentry);
+	mntput(calldata->path.mnt);
 	kfree(calldata);
 }
 
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
-			memcpy(&state->stateid, &calldata->res.stateid,
-					sizeof(state->stateid));
+			nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
 			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		.rpc_resp = &calldata->res,
 		.rpc_cred = state->owner->so_cred,
 	};
-	int mode = 0, old_mode;
+	int clear_rd, clear_wr, clear_rdwr;
+	int mode;
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
-	/* Recalculate the new open mode in case someone reopened the file
-	 * while we were waiting in line to be scheduled.
-	 */
+
+	mode = FMODE_READ|FMODE_WRITE;
+	clear_rd = clear_wr = clear_rdwr = 0;
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&calldata->inode->i_lock);
-	mode = old_mode = state->state;
+	/* Calculate the change in open mode */
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			mode &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			mode &= ~FMODE_WRITE;
+			clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
 	}
-	nfs4_state_set_mode_locked(state, mode);
-	spin_unlock(&calldata->inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
-	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+	if (!clear_rd && !clear_wr && !clear_rdwr) {
 		/* Note: exit _without_ calling nfs4_close_done */
 		task->tk_action = NULL;
 		return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
+int nfs4_do_close(struct path *path, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
+	struct nfs4_state_owner *sp = state->owner;
+	struct rpc_task *task;
 	int status = -ENOMEM;
 
 	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
 		goto out;
-	calldata->inode = inode;
+	calldata->inode = state->inode;
 	calldata->state = state;
-	calldata->arg.fh = NFS_FH(inode);
-	calldata->arg.stateid = &state->stateid;
+	calldata->arg.fh = NFS_FH(state->inode);
+	calldata->arg.stateid = &state->open_stateid;
 	/* Serialization for the sequence id */
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.server = server;
+	calldata->path.mnt = mntget(path->mnt);
+	calldata->path.dentry = dget(path->dentry);
 
-	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
-	if (status == 0)
-		goto out;
-
-	nfs_free_seqid(calldata->arg.seqid);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 out_free_calldata:
 	kfree(calldata);
 out:
+	nfs4_put_open_state(state);
+	nfs4_put_state_owner(sp);
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
 {
 	struct file *filp;
+	int ret;
 
-	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	/* If the open_intent is for execute, we have an extra check to make */
+	if (nd->intent.open.flags & FMODE_EXEC) {
+		ret = _nfs4_do_access(state->inode,
+				state->owner->so_cred,
+				nd->intent.open.flags);
+		if (ret < 0)
+			goto out_close;
+	}
+	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
 	if (!IS_ERR(filp)) {
 		struct nfs_open_context *ctx;
 		ctx = (struct nfs_open_context *)filp->private_data;
 		ctx->state = state;
 		return 0;
 	}
-	nfs4_close_state(state, nd->intent.open.flags);
-	return PTR_ERR(filp);
+	ret = PTR_ERR(filp);
+out_close:
+	nfs4_close_state(path, state, nd->intent.open.flags);
+	return ret;
 }
 
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
-	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		dentry = res;
-	nfs4_intent_set_file(nd, dentry, state);
+	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
 
 int
 nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
-	if (IS_ERR(state))
-		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 		}
 	}
 	if (state->inode == dentry->d_inode) {
-		nfs4_intent_set_file(nd, dentry, state);
+		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
-	nfs4_close_state(state, openflags);
+	nfs4_close_state(&path, state, openflags);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	dprintk("NFS call  lookupfh %s\n", name->name);
 	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
-	if (status == -NFS4ERR_MOVED)
-		status = -EREMOTE;
 	return status;
 }
 
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_proc_lookupfh(server, dirfh, name,
-						    fhandle, fattr),
-				&exception);
+		err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
+		/* FIXME: !!!! */
+		if (err == -NFS4ERR_MOVED) {
+			err = -EREMOTE;
+			break;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-	int		       status;
-	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs4_lookup_arg args = {
-		.bitmask = server->attr_bitmask,
-		.dir_fh = NFS_FH(dir),
-		.name = name,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
-	
-	nfs_fattr_init(fattr);
+	int status;
 	
 	dprintk("NFS call  lookup %s\n", name->name);
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
 	if (status == -NFS4ERR_MOVED)
 		status = nfs4_get_referral(dir, name, fattr, fhandle);
 	dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(cred);
 		goto out;
 	}
-	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+	state = nfs4_do_open(dir, &path, flags, sattr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
+		nfs_post_op_update_inode(state->inode, &fattr);
 	}
-	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
-		status = nfs4_intent_set_file(nd, dentry, state);
+	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+		status = nfs4_intent_set_file(nd, &path, state);
 	else
-		nfs4_close_state(state, flags);
+		nfs4_close_state(&path, state, flags);
 out:
 	return status;
 }
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	if (status != 0)
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	arg.lock_owner.id = lsp->ls_id; 
+	arg.lock_owner.id = lsp->ls_id.id;
 	status = rpc_call_sync(server->client, &msg, 0);
 	switch (status) {
 		case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 {
 	struct nfs4_unlockdata *data;
 
+	/* Ensure this is an unlock - when canceling a lock, the
+	 * canceled lock is passed in, and it won't be an unlock.
+	 */
+	fl->fl_type = F_UNLCK;
+
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
 		nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
-	p->arg.lock_owner.id = lsp->ls_id;
+	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
 					sizeof(data->lsp->ls_stateid.data));
 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
 	}
 	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f..e9662ba81d8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
  * subsequent patch.
  */
 
+#include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
-u32
-nfs4_alloc_lockowner_id(struct nfs_client *clp)
-{
-	return clp->cl_lockowner_id ++;
-}
-
-static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
-{
-	struct nfs4_state_owner *sp = NULL;
-
-	if (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
-		atomic_inc(&sp->so_count);
-		sp->so_cred = cred;
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		clp->cl_nunused--;
-	}
-	return sp;
-}
-
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct rpc_cred *cred = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		if (list_empty(&sp->so_states))
 			continue;
 		cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 
-	if (!list_empty(&clp->cl_state_owners)) {
-		sp = list_entry(clp->cl_state_owners.next,
-				struct nfs4_state_owner, so_list);
+	pos = rb_first(&clp->cl_state_owners);
+	if (pos != NULL) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		return get_rpccred(sp->so_cred);
 	}
 	return NULL;
 }
 
+static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
+		__u64 minval, int maxbits)
+{
+	struct rb_node **p, *parent;
+	struct nfs_unique_id *pos;
+	__u64 mask = ~0ULL;
+
+	if (maxbits < 64)
+		mask = (1ULL << maxbits) - 1ULL;
+
+	/* Ensure distribution is more or less flat */
+	get_random_bytes(&new->id, sizeof(new->id));
+	new->id &= mask;
+	if (new->id < minval)
+		new->id += minval;
+retry:
+	p = &root->rb_node;
+	parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+
+		if (new->id < pos->id)
+			p = &(*p)->rb_left;
+		else if (new->id > pos->id)
+			p = &(*p)->rb_right;
+		else
+			goto id_exists;
+	}
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+	return;
+id_exists:
+	for (;;) {
+		new->id++;
+		if (new->id < minval || (new->id & mask) != new->id) {
+			new->id = minval;
+			break;
+		}
+		parent = rb_next(parent);
+		if (parent == NULL)
+			break;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+		if (new->id < pos->id)
+			break;
+	}
+	goto retry;
+}
+
+static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
+{
+	rb_erase(&id->rb_node, root);
+}
+
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
+	struct nfs_client *clp = server->nfs_client;
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
-		if (sp->so_cred != cred)
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (server < sp->so_server) {
+			p = &parent->rb_left;
 			continue;
-		atomic_inc(&sp->so_count);
-		/* Move to the head of the list */
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		res = sp;
-		break;
+		}
+		if (server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			res = sp;
+			break;
+		}
 	}
 	return res;
 }
 
+static struct nfs4_state_owner *
+nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+{
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
+	struct nfs4_state_owner *sp;
+
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (new->so_server < sp->so_server) {
+			p = &parent->rb_left;
+			continue;
+		}
+		if (new->so_server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (new->so_cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (new->so_cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			return sp;
+		}
+	}
+	nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
+	rb_link_node(&new->so_client_node, parent, p);
+	rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+	return new;
+}
+
+static void
+nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+{
+	if (!RB_EMPTY_NODE(&sp->so_client_node))
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+	nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+}
+
 /*
  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
  * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_client;
-	spin_lock(&clp->cl_lock);
-	list_del_init(&sp->so_list);
-	spin_unlock(&clp->cl_lock);
+	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+		struct nfs_client *clp = sp->so_client;
+
+		spin_lock(&clp->cl_lock);
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+		RB_CLEAR_NODE(&sp->so_client_node);
+		spin_unlock(&clp->cl_lock);
+	}
 }
 
 /*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
-	get_rpccred(cred);
-	new = nfs4_alloc_state_owner();
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_find_state_owner(clp, cred);
-	if (sp == NULL)
-		sp = nfs4_client_grab_unused(clp, cred);
-	if (sp == NULL && new != NULL) {
-		list_add(&new->so_list, &clp->cl_state_owners);
-		new->so_client = clp;
-		new->so_id = nfs4_alloc_lockowner_id(clp);
-		new->so_cred = cred;
-		sp = new;
-		new = NULL;
-	}
+	sp = nfs4_find_state_owner(server, cred);
 	spin_unlock(&clp->cl_lock);
-	kfree(new);
 	if (sp != NULL)
 		return sp;
-	put_rpccred(cred);
-	return NULL;
+	new = nfs4_alloc_state_owner();
+	if (new == NULL)
+		return NULL;
+	new->so_client = clp;
+	new->so_server = server;
+	new->so_cred = cred;
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_insert_state_owner(clp, new);
+	spin_unlock(&clp->cl_lock);
+	if (sp == new)
+		get_rpccred(cred);
+	else
+		kfree(new);
+	return sp;
 }
 
 /*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
-		goto out_free;
-	if (list_empty(&sp->so_list))
-		goto out_free;
-	list_move(&sp->so_list, &clp->cl_unused);
-	clp->cl_nunused++;
-	spin_unlock(&clp->cl_lock);
-	put_rpccred(cred);
-	cred = NULL;
-	return;
-out_free:
-	list_del(&sp->so_list);
+	nfs4_remove_state_owner(clp, sp);
 	spin_unlock(&clp->cl_lock);
 	put_rpccred(cred);
 	kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	spin_lock_init(&state->state_lock);
+	seqlock_init(&state->seqlock);
 	return state;
 }
 
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 	struct nfs4_state *state;
 
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
-		/* Is this in the process of being freed? */
-		if (state->state == 0)
+		if (state->owner != owner)
 			continue;
-		if (state->owner == owner) {
-			atomic_inc(&state->count);
+		if (atomic_inc_not_zero(&state->count))
 			return state;
-		}
 	}
 	return NULL;
 }
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 {
-	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	int oldstate, newstate = 0;
+	int call_close = 0;
+	int newstate;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
-	spin_lock(&inode->i_lock);
 	switch (mode & (FMODE_READ | FMODE_WRITE)) {
 		case FMODE_READ:
 			state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr--;
 	}
-	oldstate = newstate = state->state;
+	newstate = FMODE_READ|FMODE_WRITE;
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			newstate &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			newstate &= ~FMODE_WRITE;
+			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (newstate == 0)
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		nfs4_state_set_mode_locked(state, newstate);
-		oldstate = newstate;
-	}
-	spin_unlock(&inode->i_lock);
+	nfs4_state_set_mode_locked(state, newstate);
 	spin_unlock(&owner->so_lock);
 
-	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
-		return;
-	nfs4_put_open_state(state);
-	nfs4_put_state_owner(owner);
+	if (!call_close) {
+		nfs4_put_open_state(state);
+		nfs4_put_state_owner(owner);
+	} else
+		nfs4_do_close(path, state);
 }
 
 /*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
-	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
 	spin_unlock(&clp->cl_lock);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 }
 
+static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+{
+	struct nfs_client *clp = lsp->ls_state->owner->so_client;
+
+	spin_lock(&clp->cl_lock);
+	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+	spin_unlock(&clp->cl_lock);
+	kfree(lsp);
+}
+
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
 			return NULL;
 	}
 	spin_unlock(&state->state_lock);
-	kfree(new);
+	if (new != NULL)
+		nfs4_free_lock_state(new);
 	return lsp;
 }
 
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 	if (list_empty(&state->lock_states))
 		clear_bit(LK_STATE_IN_USE, &state->flags);
 	spin_unlock(&state->state_lock);
-	kfree(lsp);
+	nfs4_free_lock_state(lsp);
 }
 
 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
+	int seq;
 
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	do {
+		seq = read_seqbegin(&state->seqlock);
+		memcpy(dst, &state->stateid, sizeof(*dst));
+	} while (read_seqretry(&state->seqlock, seq));
 	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 		return;
 
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
  * failed with a seqid incrementing error -
  * see comments nfs_fs.h:seqid_mutating_error()
  */
-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
 	switch (status) {
 		case 0:
 			break;
 		case -NFS4ERR_BAD_SEQID:
+			if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+				return;
+			printk(KERN_WARNING "NFS: v4 server returned a bad"
+					"sequence-id error on an"
+					"unconfirmed sequence %p!\n",
+					seqid->sequence);
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
 	}
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 /*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
  */
 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 {
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
 static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state *state;
 	struct nfs4_lock_state *lock;
 
 	/* Reset all sequence ids to zero */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		sp->so_seqid.counter = 0;
 		sp->so_seqid.flags = 0;
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
+			clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_bit(NFS_O_RDWR_STATE, &state->flags);
 			list_for_each_entry(lock, &state->lock_states, ls_locks) {
 				lock->ls_seqid.counter = 0;
 				lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
 {
 	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
 	/* Mark all delegations for reclaim */
 	nfs_delegation_mark_reclaim(clp);
 	/* Note: list is protected by exclusive lock on cl->cl_sem */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		status = nfs4_reclaim_open_state(ops, sp);
 		if (status < 0) {
 			if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9..c08738441f7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
 #endif
 
 /* lock,open owner id: 
- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT  >> 2)
+ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
  */
-#define owner_id_maxsz          (1 + 1)
+#define open_owner_id_maxsz	(1 + 4)
+#define lock_owner_id_maxsz	(1 + 4)
 #define compound_encode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define compound_decode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define op_encode_hdr_maxsz	(1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
 				nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
 				3 + (NFS4_VERIFIER_SIZE >> 2))
 #define decode_setclientid_confirm_maxsz \
 				(op_decode_hdr_maxsz)
-#define encode_lookup_maxsz	(op_encode_hdr_maxsz + \
-				1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
+#define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_share_access_maxsz \
+				(2)
+#define encode_createmode_maxsz	(1 + nfs4_fattr_maxsz)
+#define encode_opentype_maxsz	(1 + encode_createmode_maxsz)
+#define encode_claim_null_maxsz	(1 + nfs4_name_maxsz)
+#define encode_open_maxsz	(op_encode_hdr_maxsz + \
+				2 + encode_share_access_maxsz + 2 + \
+				open_owner_id_maxsz + \
+				encode_opentype_maxsz + \
+				encode_claim_null_maxsz)
+#define decode_ace_maxsz	(3 + nfs4_owner_maxsz)
+#define decode_delegation_maxsz	(1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
+				decode_ace_maxsz)
+#define decode_change_info_maxsz	(5)
+#define decode_open_maxsz	(op_decode_hdr_maxsz + \
+				XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+				decode_change_info_maxsz + 1 + \
+				nfs4_fattr_bitmap_maxsz + \
+				decode_delegation_maxsz)
 #define encode_remove_maxsz	(op_encode_hdr_maxsz + \
 				nfs4_name_maxsz)
 #define encode_rename_maxsz	(op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
 				nfs4_fattr_maxsz)
-#define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
+#define decode_create_maxsz	(op_decode_hdr_maxsz + \
+				decode_change_info_maxsz + \
+				nfs4_fattr_bitmap_maxsz)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define encode_fs_locations_maxsz \
+				(encode_getattr_maxsz)
+#define decode_fs_locations_maxsz \
+				(0)
 #define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_dec_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_enc_read_sz	(compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
 				op_decode_hdr_maxsz + 2 + \
 				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
-                                encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + \
-                                13 + 3 + 2 + 64 + \
-                                encode_getattr_maxsz + \
-                                encode_getfh_maxsz)
+				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
+				encode_open_maxsz + \
+				encode_getfh_maxsz + \
+				encode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
-                                decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
-                                decode_getattr_maxsz + \
-                                decode_getfh_maxsz)
+				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
+				decode_open_maxsz + \
+				decode_getfh_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz      \
                                 (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
                                         op_decode_hdr_maxsz + 4)
 #define NFS4_enc_open_noattr_sz	(compound_encode_hdr_maxsz + \
 					encode_putfh_maxsz + \
-					op_encode_hdr_maxsz + \
-					11)
+					encode_open_maxsz + \
+					encode_getattr_maxsz)
 #define NFS4_dec_open_noattr_sz	(compound_decode_hdr_maxsz + \
 					decode_putfh_maxsz + \
-					op_decode_hdr_maxsz + \
-					4 + 5 + 2 + 3)
+					decode_open_maxsz + \
+					decode_getattr_maxsz)
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
 				op_encode_hdr_maxsz + \
 				1 + 1 + 2 + 2 + \
 				1 + 4 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz + \
 				op_decode_hdr_maxsz + \
 				2 + 2 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz + \
 				op_encode_hdr_maxsz + \
 				1 + 2 + 2 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lockt_sz       (NFS4_dec_lock_sz)
 #define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
 				encode_getfh_maxsz)
 #define NFS4_dec_lookup_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + \
+				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_fs_locations_sz \
 				(compound_encode_hdr_maxsz + \
 				 encode_putfh_maxsz + \
-				 encode_getattr_maxsz)
+				 encode_lookup_maxsz + \
+				 encode_fs_locations_maxsz)
 #define NFS4_dec_fs_locations_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_putfh_maxsz + \
-				 op_decode_hdr_maxsz + \
-				 nfs4_fattr_bitmap_maxsz)
+				 decode_lookup_maxsz + \
+				 decode_fs_locations_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 	WRITE64(nfs4_lock_length(args->fl));
 	WRITE32(args->new_lock_owner);
 	if (args->new_lock_owner){
-		RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
+		RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
 		WRITE32(args->open_seqid->sequence->counter);
 		WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
 		WRITE32(args->lock_seqid->sequence->counter);
 		WRITE64(args->lock_owner.clientid);
-		WRITE32(4);
-		WRITE32(args->lock_owner.id);
+		WRITE32(16);
+		WRITEMEM("lock id:", 8);
+		WRITE64(args->lock_owner.id);
 	}
 	else {
 		RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
 {
 	__be32 *p;
 
-	RESERVE_SPACE(40);
+	RESERVE_SPACE(52);
 	WRITE32(OP_LOCKT);
 	WRITE32(nfs4_lock_type(args->fl, 0));
 	WRITE64(args->fl->fl_start);
 	WRITE64(nfs4_lock_length(args->fl));
 	WRITE64(args->lock_owner.clientid);
-	WRITE32(4);
-	WRITE32(args->lock_owner.id);
+	WRITE32(16);
+	WRITEMEM("lock id:", 8);
+	WRITE64(args->lock_owner.id);
 
 	return 0;
 }
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 	WRITE32(OP_OPEN);
 	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
-	RESERVE_SPACE(16);
+	RESERVE_SPACE(28);
 	WRITE64(arg->clientid);
-	WRITE32(4);
-	WRITE32(arg->id);
+	WRITE32(16);
+	WRITEMEM("open id:", 8);
+	WRITE64(arg->id);
 }
 
 static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	uint32_t attrs[2] = {
 		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
 		FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 
 static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	__be32 *p;
 
@@ -1735,7 +1772,7 @@ out:
  */
 static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
 		struct nfs_getaclargs *args)
 {
 	struct xdr_stream xdr;
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 	struct compound_hdr hdr = {
 		.nops = 3,
 	};
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	int replen;
 	int status;
 
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 {
         __be32 *p;
-        uint32_t bmlen;
+	uint32_t savewords, bmlen, i;
         int status;
 
         status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
                 goto xdr_error;
 
         READ_BUF(bmlen << 2);
-        p += bmlen;
+	savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
+	for (i = 0; i < savewords; ++i)
+		READ32(res->attrset[i]);
+	for (; i < NFS4_BITMAP_SIZE; i++)
+		res->attrset[i] = 0;
+
 	return decode_delegation(xdr, res);
 xdr_error:
 	dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d..3490322d114 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
 	printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
 		program, version, NIPQUAD(servaddr));
 	set_sockaddr(&sin, servaddr, 0);
-	return rpcb_getport_external(&sin, program, version, proto);
+	return rpcb_getport_sync(&sin, program, version, proto);
 }
 
 
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
 					NFS_MNT3_VERSION : NFS_MNT_VERSION;
 
 	set_sockaddr(&sin, servaddr, htons(mount_port));
-	status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+	status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
+			   nfs_path, version, protocol, &fh);
 	if (status < 0)
 		printk(KERN_ERR "Root-NFS: Server returned error %d "
 				"while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e8..f56dae5216f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
-	atomic_set(&req->wb_count, 1);
 	req->wb_context = get_nfs_open_context(ctx);
-
+	kref_init(&req->wb_kref);
 	return req;
 }
 
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
 }
 
 /**
- * nfs_set_page_writeback_locked - Lock a request for writeback
+ * nfs_set_page_tag_locked - Tag a request as locked
  * @req:
  */
-int nfs_set_page_writeback_locked(struct nfs_page *req)
+static int nfs_set_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
 
 	if (!nfs_lock_request(req))
 		return 0;
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
 
 /**
- * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
  */
-void nfs_clear_page_writeback(struct nfs_page *req)
+void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (req->wb_page != NULL) {
-		spin_lock(&nfsi->req_lock);
-		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-		spin_unlock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
+		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+		spin_unlock(&inode->i_lock);
 	}
 	nfs_unlock_request(req);
 }
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
  *
  * Note: Should never be called with the spinlock held!
  */
-void
-nfs_release_request(struct nfs_page *req)
+static void nfs_free_request(struct kref *kref)
 {
-	if (!atomic_dec_and_test(&req->wb_count))
-		return;
+	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
 	/* Release struct file or cached credential */
 	nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
 	nfs_page_free(req);
 }
 
+void nfs_release_request(struct nfs_page *req)
+{
+	kref_put(&req->wb_kref, nfs_free_request);
+}
+
 static int nfs_wait_bit_interruptible(void *word)
 {
 	int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+	struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
 	sigset_t oldmask;
 	int ret = 0;
 
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 /**
  * nfs_scan_list - Scan a list for matching requests
  * @nfsi: NFS inode
- * @head: One of the NFS inode request lists
  * @dst: Destination list
  * @idx_start: lower bound of page->index to scan
  * @npages: idx_start + npages sets the upper bound to scan.
+ * @tag: tag to scan for
  *
  * Moves elements from one of the inode request lists.
  * If the number of requests is set to 0, the entire address_space
  * starting at index idx_start, is scanned.
  * The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's req_lock when calling this function
+ * You must be holding the inode's i_lock when calling this function
  */
-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+int nfs_scan_list(struct nfs_inode *nfsi,
 		struct list_head *dst, pgoff_t idx_start,
-		unsigned int npages)
+		unsigned int npages, int tag)
 {
 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 	struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
 		idx_end = idx_start + npages - 1;
 
 	for (;;) {
-		found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
+		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
 				(void **)&pgvec[0], idx_start,
-				NFS_SCAN_MAXENTRIES);
+				NFS_SCAN_MAXENTRIES, tag);
 		if (found <= 0)
 			break;
 		for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
 			if (req->wb_index > idx_end)
 				goto out;
 			idx_start = req->wb_index + 1;
-			if (req->wb_list_head != head)
-				continue;
-			if (nfs_set_page_writeback_locked(req)) {
+			if (nfs_set_page_tag_locked(req)) {
 				nfs_list_remove_request(req);
+				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+						req->wb_index, tag);
 				nfs_list_add_request(req, dst);
 				res++;
+				if (res == INT_MAX)
+					goto out;
 			}
 		}
-
+		/* for latency reduction */
+		cond_resched_lock(&nfsi->vfs_inode.i_lock);
 	}
 out:
 	return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c03..6ae2e58ed05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
 	unlock_page(req->wb_page);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 	nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	int flags;
 
 	data->req	  = req;
-	data->inode	  = inode = req->wb_context->dentry->d_inode;
+	data->inode	  = inode = req->wb_context->path.dentry->d_inode;
 	data->cred	  = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
 	 */
 	error = nfs_wb_page(inode, page);
 	if (error)
-		goto out_error;
+		goto out_unlock;
+	if (PageUptodate(page))
+		goto out_unlock;
 
 	error = -ESTALE;
 	if (NFS_STALE(inode))
-		goto out_error;
+		goto out_unlock;
 
 	if (file == NULL) {
 		error = -EBADF;
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
-			goto out_error;
+			goto out_unlock;
 	} else
 		ctx = get_nfs_open_context((struct nfs_open_context *)
 				file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
 
 	put_nfs_open_context(ctx);
 	return error;
-
-out_error:
+out_unlock:
 	unlock_page(page);
 	return error;
 }
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
 	struct inode *inode = page->mapping->host;
 	struct nfs_page *new;
 	unsigned int len;
+	int error;
+
+	error = nfs_wb_page(inode, page);
+	if (error)
+		goto out_unlock;
+	if (PageUptodate(page))
+		goto out_unlock;
 
-	nfs_wb_page(inode, page);
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
+
 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
-	if (IS_ERR(new)) {
-			SetPageError(page);
-			unlock_page(page);
-			return PTR_ERR(new);
-	}
+	if (IS_ERR(new))
+		goto out_error;
+
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
 	nfs_pageio_add_request(desc->pgio, new);
 	return 0;
+out_error:
+	error = PTR_ERR(new);
+	SetPageError(page);
+out_unlock:
+	unlock_page(page);
+	return error;
 }
 
 int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc260..a2b1af89ca1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
 #include <linux/inet.h>
 #include <linux/nfs_xdr.h>
 #include <linux/magic.h>
+#include <linux/parser.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -57,6 +58,167 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
+
+struct nfs_parsed_mount_data {
+	int			flags;
+	int			rsize, wsize;
+	int			timeo, retrans;
+	int			acregmin, acregmax,
+				acdirmin, acdirmax;
+	int			namlen;
+	unsigned int		bsize;
+	unsigned int		auth_flavor_len;
+	rpc_authflavor_t	auth_flavors[1];
+	char			*client_address;
+
+	struct {
+		struct sockaddr_in	address;
+		unsigned int		program;
+		unsigned int		version;
+		unsigned short		port;
+		int			protocol;
+	} mount_server;
+
+	struct {
+		struct sockaddr_in	address;
+		char			*hostname;
+		char			*export_path;
+		unsigned int		program;
+		int			protocol;
+	} nfs_server;
+};
+
+enum {
+	/* Mount options that take no arguments */
+	Opt_soft, Opt_hard,
+	Opt_intr, Opt_nointr,
+	Opt_posix, Opt_noposix,
+	Opt_cto, Opt_nocto,
+	Opt_ac, Opt_noac,
+	Opt_lock, Opt_nolock,
+	Opt_v2, Opt_v3,
+	Opt_udp, Opt_tcp,
+	Opt_acl, Opt_noacl,
+	Opt_rdirplus, Opt_nordirplus,
+	Opt_sharecache, Opt_nosharecache,
+
+	/* Mount options that take integer arguments */
+	Opt_port,
+	Opt_rsize, Opt_wsize, Opt_bsize,
+	Opt_timeo, Opt_retrans,
+	Opt_acregmin, Opt_acregmax,
+	Opt_acdirmin, Opt_acdirmax,
+	Opt_actimeo,
+	Opt_namelen,
+	Opt_mountport,
+	Opt_mountprog, Opt_mountvers,
+	Opt_nfsprog, Opt_nfsvers,
+
+	/* Mount options that take string arguments */
+	Opt_sec, Opt_proto, Opt_mountproto,
+	Opt_addr, Opt_mounthost, Opt_clientaddr,
+
+	/* Mount options that are ignored */
+	Opt_userspace, Opt_deprecated,
+
+	Opt_err
+};
+
+static match_table_t nfs_mount_option_tokens = {
+	{ Opt_userspace, "bg" },
+	{ Opt_userspace, "fg" },
+	{ Opt_soft, "soft" },
+	{ Opt_hard, "hard" },
+	{ Opt_intr, "intr" },
+	{ Opt_nointr, "nointr" },
+	{ Opt_posix, "posix" },
+	{ Opt_noposix, "noposix" },
+	{ Opt_cto, "cto" },
+	{ Opt_nocto, "nocto" },
+	{ Opt_ac, "ac" },
+	{ Opt_noac, "noac" },
+	{ Opt_lock, "lock" },
+	{ Opt_nolock, "nolock" },
+	{ Opt_v2, "v2" },
+	{ Opt_v3, "v3" },
+	{ Opt_udp, "udp" },
+	{ Opt_tcp, "tcp" },
+	{ Opt_acl, "acl" },
+	{ Opt_noacl, "noacl" },
+	{ Opt_rdirplus, "rdirplus" },
+	{ Opt_nordirplus, "nordirplus" },
+	{ Opt_sharecache, "sharecache" },
+	{ Opt_nosharecache, "nosharecache" },
+
+	{ Opt_port, "port=%u" },
+	{ Opt_rsize, "rsize=%u" },
+	{ Opt_wsize, "wsize=%u" },
+	{ Opt_bsize, "bsize=%u" },
+	{ Opt_timeo, "timeo=%u" },
+	{ Opt_retrans, "retrans=%u" },
+	{ Opt_acregmin, "acregmin=%u" },
+	{ Opt_acregmax, "acregmax=%u" },
+	{ Opt_acdirmin, "acdirmin=%u" },
+	{ Opt_acdirmax, "acdirmax=%u" },
+	{ Opt_actimeo, "actimeo=%u" },
+	{ Opt_userspace, "retry=%u" },
+	{ Opt_namelen, "namlen=%u" },
+	{ Opt_mountport, "mountport=%u" },
+	{ Opt_mountprog, "mountprog=%u" },
+	{ Opt_mountvers, "mountvers=%u" },
+	{ Opt_nfsprog, "nfsprog=%u" },
+	{ Opt_nfsvers, "nfsvers=%u" },
+	{ Opt_nfsvers, "vers=%u" },
+
+	{ Opt_sec, "sec=%s" },
+	{ Opt_proto, "proto=%s" },
+	{ Opt_mountproto, "mountproto=%s" },
+	{ Opt_addr, "addr=%s" },
+	{ Opt_clientaddr, "clientaddr=%s" },
+	{ Opt_mounthost, "mounthost=%s" },
+
+	{ Opt_err, NULL }
+};
+
+enum {
+	Opt_xprt_udp, Opt_xprt_tcp,
+
+	Opt_xprt_err
+};
+
+static match_table_t nfs_xprt_protocol_tokens = {
+	{ Opt_xprt_udp, "udp" },
+	{ Opt_xprt_tcp, "tcp" },
+
+	{ Opt_xprt_err, NULL }
+};
+
+enum {
+	Opt_sec_none, Opt_sec_sys,
+	Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
+	Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
+	Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
+
+	Opt_sec_err
+};
+
+static match_table_t nfs_secflavor_tokens = {
+	{ Opt_sec_none, "none" },
+	{ Opt_sec_none, "null" },
+	{ Opt_sec_sys, "sys" },
+
+	{ Opt_sec_krb5, "krb5" },
+	{ Opt_sec_krb5i, "krb5i" },
+	{ Opt_sec_krb5p, "krb5p" },
+
+	{ Opt_sec_lkey, "lkey" },
+	{ Opt_sec_lkeyi, "lkeyi" },
+	{ Opt_sec_lkeyp, "lkeyp" },
+
+	{ Opt_sec_err, NULL }
+};
+
+
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 		{ RPC_AUTH_GSS_SPKM, "spkm" },
 		{ RPC_AUTH_GSS_SPKMI, "spkmi" },
 		{ RPC_AUTH_GSS_SPKMP, "spkmp" },
-		{ -1, "unknown" }
+		{ UINT_MAX, "unknown" }
 	};
 	int i;
 
-	for (i=0; sec_flavours[i].flavour != -1; i++) {
+	for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
 		if (sec_flavours[i].flavour == flavour)
 			break;
 	}
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+		{ NFS_MOUNT_UNSHARED, ",nosharecache", ""},
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
+	struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
+	struct rpc_clnt *rpc;
+
 	shrink_submounts(vfsmnt, &nfs_automount_list);
+
+	if (!(flags & MNT_FORCE))
+		return;
+	/* -EIO all pending I/O */
+	rpc = server->client_acl;
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
+	rpc = server->client;
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
 }
 
 /*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
+ * Sanity-check a server address provided by the mount command
  */
-static int nfs_validate_mount_data(struct nfs_mount_data *data,
-				   struct nfs_fh *mntfh)
+static int nfs_verify_server_address(struct sockaddr *addr)
 {
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return -EINVAL;
+	switch (addr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sa = (struct sockaddr_in *) addr;
+		if (sa->sin_addr.s_addr != INADDR_ANY)
+			return 1;
+		break;
+	}
 	}
 
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Error-check and convert a string of mount options from user space into
+ * a data structure
+ */
+static int nfs_parse_mount_options(char *raw,
+				   struct nfs_parsed_mount_data *mnt)
+{
+	char *p, *string;
+
+	if (!raw) {
+		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
+		return 1;
 	}
+	dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
 
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				return -EINVAL;
+	while ((p = strsep(&raw, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int option, token;
+
+		if (!*p)
+			continue;
+
+		dfprintk(MOUNT, "NFS:   parsing nfs mount option '%s'\n", p);
+
+		token = match_token(p, nfs_mount_option_tokens, args);
+		switch (token) {
+		case Opt_soft:
+			mnt->flags |= NFS_MOUNT_SOFT;
+			break;
+		case Opt_hard:
+			mnt->flags &= ~NFS_MOUNT_SOFT;
+			break;
+		case Opt_intr:
+			mnt->flags |= NFS_MOUNT_INTR;
+			break;
+		case Opt_nointr:
+			mnt->flags &= ~NFS_MOUNT_INTR;
+			break;
+		case Opt_posix:
+			mnt->flags |= NFS_MOUNT_POSIX;
+			break;
+		case Opt_noposix:
+			mnt->flags &= ~NFS_MOUNT_POSIX;
+			break;
+		case Opt_cto:
+			mnt->flags &= ~NFS_MOUNT_NOCTO;
+			break;
+		case Opt_nocto:
+			mnt->flags |= NFS_MOUNT_NOCTO;
+			break;
+		case Opt_ac:
+			mnt->flags &= ~NFS_MOUNT_NOAC;
+			break;
+		case Opt_noac:
+			mnt->flags |= NFS_MOUNT_NOAC;
+			break;
+		case Opt_lock:
+			mnt->flags &= ~NFS_MOUNT_NONLM;
+			break;
+		case Opt_nolock:
+			mnt->flags |= NFS_MOUNT_NONLM;
+			break;
+		case Opt_v2:
+			mnt->flags &= ~NFS_MOUNT_VER3;
+			break;
+		case Opt_v3:
+			mnt->flags |= NFS_MOUNT_VER3;
+			break;
+		case Opt_udp:
+			mnt->flags &= ~NFS_MOUNT_TCP;
+			mnt->nfs_server.protocol = IPPROTO_UDP;
+			mnt->timeo = 7;
+			mnt->retrans = 5;
+			break;
+		case Opt_tcp:
+			mnt->flags |= NFS_MOUNT_TCP;
+			mnt->nfs_server.protocol = IPPROTO_TCP;
+			mnt->timeo = 600;
+			mnt->retrans = 2;
+			break;
+		case Opt_acl:
+			mnt->flags &= ~NFS_MOUNT_NOACL;
+			break;
+		case Opt_noacl:
+			mnt->flags |= NFS_MOUNT_NOACL;
+			break;
+		case Opt_rdirplus:
+			mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
+			break;
+		case Opt_nordirplus:
+			mnt->flags |= NFS_MOUNT_NORDIRPLUS;
+			break;
+		case Opt_sharecache:
+			mnt->flags &= ~NFS_MOUNT_UNSHARED;
+			break;
+		case Opt_nosharecache:
+			mnt->flags |= NFS_MOUNT_UNSHARED;
+			break;
+
+		case Opt_port:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0 || option > 65535)
+				return 0;
+			mnt->nfs_server.address.sin_port = htonl(option);
+			break;
+		case Opt_rsize:
+			if (match_int(args, &mnt->rsize))
+				return 0;
+			break;
+		case Opt_wsize:
+			if (match_int(args, &mnt->wsize))
+				return 0;
+			break;
+		case Opt_bsize:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->bsize = option;
+			break;
+		case Opt_timeo:
+			if (match_int(args, &mnt->timeo))
+				return 0;
+			break;
+		case Opt_retrans:
+			if (match_int(args, &mnt->retrans))
+				return 0;
+			break;
+		case Opt_acregmin:
+			if (match_int(args, &mnt->acregmin))
+				return 0;
+			break;
+		case Opt_acregmax:
+			if (match_int(args, &mnt->acregmax))
+				return 0;
+			break;
+		case Opt_acdirmin:
+			if (match_int(args, &mnt->acdirmin))
+				return 0;
+			break;
+		case Opt_acdirmax:
+			if (match_int(args, &mnt->acdirmax))
+				return 0;
+			break;
+		case Opt_actimeo:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->acregmin =
+			mnt->acregmax =
+			mnt->acdirmin =
+			mnt->acdirmax = option;
+			break;
+		case Opt_namelen:
+			if (match_int(args, &mnt->namlen))
+				return 0;
+			break;
+		case Opt_mountport:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0 || option > 65535)
+				return 0;
+			mnt->mount_server.port = option;
+			break;
+		case Opt_mountprog:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->mount_server.program = option;
+			break;
+		case Opt_mountvers:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->mount_server.version = option;
+			break;
+		case Opt_nfsprog:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->nfs_server.program = option;
+			break;
+		case Opt_nfsvers:
+			if (match_int(args, &option))
+				return 0;
+			switch (option) {
+			case 2:
+				mnt->flags &= ~NFS_MOUNT_VER3;
+				break;
+			case 3:
+				mnt->flags |= NFS_MOUNT_VER3;
+				break;
+			default:
+				goto out_unrec_vers;
 			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				return -EINVAL;
+			break;
+
+		case Opt_sec:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string, nfs_secflavor_tokens, args);
+			kfree(string);
+
+			/*
+			 * The flags setting is for v2/v3.  The flavor_len
+			 * setting is for v4.  v2/v3 also need to know the
+			 * difference between NULL and UNIX.
+			 */
+			switch (token) {
+			case Opt_sec_none:
+				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 0;
+				mnt->auth_flavors[0] = RPC_AUTH_NULL;
+				break;
+			case Opt_sec_sys:
+				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 0;
+				mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+				break;
+			case Opt_sec_krb5:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+				break;
+			case Opt_sec_krb5i:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+				break;
+			case Opt_sec_krb5p:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+				break;
+			case Opt_sec_lkey:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+				break;
+			case Opt_sec_lkeyi:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+				break;
+			case Opt_sec_lkeyp:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+				break;
+			case Opt_sec_spkm:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+				break;
+			case Opt_sec_spkmi:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+				break;
+			case Opt_sec_spkmp:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+				break;
+			default:
+				goto out_unrec_sec;
 			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
+			break;
+		case Opt_proto:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string,
+					    nfs_xprt_protocol_tokens, args);
+			kfree(string);
+
+			switch (token) {
+			case Opt_udp:
+				mnt->flags &= ~NFS_MOUNT_TCP;
+				mnt->nfs_server.protocol = IPPROTO_UDP;
+				mnt->timeo = 7;
+				mnt->retrans = 5;
+				break;
+			case Opt_tcp:
+				mnt->flags |= NFS_MOUNT_TCP;
+				mnt->nfs_server.protocol = IPPROTO_TCP;
+				mnt->timeo = 600;
+				mnt->retrans = 2;
+				break;
+			default:
+				goto out_unrec_xprt;
+			}
+			break;
+		case Opt_mountproto:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string,
+					    nfs_xprt_protocol_tokens, args);
+			kfree(string);
+
+			switch (token) {
+			case Opt_udp:
+				mnt->mount_server.protocol = IPPROTO_UDP;
+				break;
+			case Opt_tcp:
+				mnt->mount_server.protocol = IPPROTO_TCP;
+				break;
+			default:
+				goto out_unrec_xprt;
+			}
+			break;
+		case Opt_addr:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->nfs_server.address.sin_family = AF_INET;
+			mnt->nfs_server.address.sin_addr.s_addr =
+							in_aton(string);
+			kfree(string);
+			break;
+		case Opt_clientaddr:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->client_address = string;
+			break;
+		case Opt_mounthost:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->mount_server.address.sin_family = AF_INET;
+			mnt->mount_server.address.sin_addr.s_addr =
+							in_aton(string);
+			kfree(string);
+			break;
 
-	/* Set the pseudoflavor */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
+		case Opt_userspace:
+		case Opt_deprecated:
+			break;
 
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		return -EPROTONOSUPPORT;
+		default:
+			goto out_unknown;
+		}
 	}
-#endif /* CONFIG_NFS_V3 */
 
-	/* We now require that the mount process passes the remote address */
-	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-			__FUNCTION__);
-		return -EINVAL;
+	return 1;
+
+out_nomem:
+	printk(KERN_INFO "NFS: not enough memory to parse option\n");
+	return 0;
+
+out_unrec_vers:
+	printk(KERN_INFO "NFS: unrecognized NFS version number\n");
+	return 0;
+
+out_unrec_xprt:
+	printk(KERN_INFO "NFS: unrecognized transport protocol\n");
+	return 0;
+
+out_unrec_sec:
+	printk(KERN_INFO "NFS: unrecognized security flavor\n");
+	return 0;
+
+out_unknown:
+	printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
+	return 0;
+}
+
+/*
+ * Use the remote server's MOUNT service to request the NFS file handle
+ * corresponding to the provided path.
+ */
+static int nfs_try_mount(struct nfs_parsed_mount_data *args,
+			 struct nfs_fh *root_fh)
+{
+	struct sockaddr_in sin;
+	int status;
+
+	if (args->mount_server.version == 0) {
+		if (args->flags & NFS_MOUNT_VER3)
+			args->mount_server.version = NFS_MNT3_VERSION;
+		else
+			args->mount_server.version = NFS_MNT_VERSION;
 	}
 
-	/* Prepare the root filehandle */
-	if (data->flags & NFS_MOUNT_VER3)
-		mntfh->size = data->root.size;
+	/*
+	 * Construct the mount server's address.
+	 */
+	if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
+		sin = args->mount_server.address;
 	else
-		mntfh->size = NFS2_FHSIZE;
+		sin = args->nfs_server.address;
+	if (args->mount_server.port == 0) {
+		status = rpcb_getport_sync(&sin,
+					   args->mount_server.program,
+					   args->mount_server.version,
+					   args->mount_server.protocol);
+		if (status < 0)
+			goto out_err;
+		sin.sin_port = htons(status);
+	} else
+		sin.sin_port = htons(args->mount_server.port);
+
+	/*
+	 * Now ask the mount server to map our export path
+	 * to a file handle.
+	 */
+	status = nfs_mount((struct sockaddr *) &sin,
+			   sizeof(sin),
+			   args->nfs_server.hostname,
+			   args->nfs_server.export_path,
+			   args->mount_server.version,
+			   args->mount_server.protocol,
+			   root_fh);
+	if (status < 0)
+		goto out_err;
+
+	return status;
 
-	if (mntfh->size > sizeof(mntfh->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		return -EINVAL;
+out_err:
+	dfprintk(MOUNT, "NFS: unable to contact server on host "
+		 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+	return status;
+}
+
+/*
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ *   immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ *   mountproto=tcp after mountproto=udp, and so on
+ *
+ * XXX: as far as I can tell, changing the NFS program number is not
+ *      supported in the NFS client.
+ */
+static int nfs_validate_mount_data(struct nfs_mount_data **options,
+				   struct nfs_fh *mntfh,
+				   const char *dev_name)
+{
+	struct nfs_mount_data *data = *options;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	switch (data->version) {
+	case 1:
+		data->namlen = 0;
+	case 2:
+		data->bsize = 0;
+	case 3:
+		if (data->flags & NFS_MOUNT_VER3)
+			goto out_no_v3;
+		data->root.size = NFS2_FHSIZE;
+		memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+	case 4:
+		if (data->flags & NFS_MOUNT_SECFLAVOUR)
+			goto out_no_sec;
+	case 5:
+		memset(data->context, 0, sizeof(data->context));
+	case 6:
+		if (data->flags & NFS_MOUNT_VER3)
+			mntfh->size = data->root.size;
+		else
+			mntfh->size = NFS2_FHSIZE;
+
+		if (mntfh->size > sizeof(mntfh->data))
+			goto out_invalid_fh;
+
+		memcpy(mntfh->data, data->root.data, mntfh->size);
+		if (mntfh->size < sizeof(mntfh->data))
+			memset(mntfh->data + mntfh->size, 0,
+			       sizeof(mntfh->data) - mntfh->size);
+		break;
+	default: {
+		unsigned int len;
+		char *c;
+		int status;
+		struct nfs_parsed_mount_data args = {
+			.flags		= (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
+			.rsize		= NFS_MAX_FILE_IO_SIZE,
+			.wsize		= NFS_MAX_FILE_IO_SIZE,
+			.timeo		= 600,
+			.retrans	= 2,
+			.acregmin	= 3,
+			.acregmax	= 60,
+			.acdirmin	= 30,
+			.acdirmax	= 60,
+			.mount_server.protocol = IPPROTO_UDP,
+			.mount_server.program = NFS_MNT_PROGRAM,
+			.nfs_server.protocol = IPPROTO_TCP,
+			.nfs_server.program = NFS_PROGRAM,
+		};
+
+		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+			return -EINVAL;
+
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (data == NULL)
+			return -ENOMEM;
+
+		/*
+		 * NB: after this point, caller will free "data"
+		 * if we return an error
+		 */
+		*options = data;
+
+		c = strchr(dev_name, ':');
+		if (c == NULL)
+			return -EINVAL;
+		len = c - dev_name - 1;
+		if (len > sizeof(data->hostname))
+			return -EINVAL;
+		strncpy(data->hostname, dev_name, len);
+		args.nfs_server.hostname = data->hostname;
+
+		c++;
+		if (strlen(c) > NFS_MAXPATHLEN)
+			return -EINVAL;
+		args.nfs_server.export_path = c;
+
+		status = nfs_try_mount(&args, mntfh);
+		if (status)
+			return -EINVAL;
+
+		/*
+		 * Translate to nfs_mount_data, which nfs_fill_super
+		 * can deal with.
+		 */
+		data->version		= 6;
+		data->flags		= args.flags;
+		data->rsize		= args.rsize;
+		data->wsize		= args.wsize;
+		data->timeo		= args.timeo;
+		data->retrans		= args.retrans;
+		data->acregmin		= args.acregmin;
+		data->acregmax		= args.acregmax;
+		data->acdirmin		= args.acdirmin;
+		data->acdirmax		= args.acdirmax;
+		data->addr		= args.nfs_server.address;
+		data->namlen		= args.namlen;
+		data->bsize		= args.bsize;
+		data->pseudoflavor	= args.auth_flavors[0];
+
+		break;
+		}
 	}
 
-	memcpy(mntfh->data, data->root.data, mntfh->size);
-	if (mntfh->size < sizeof(mntfh->data))
-		memset(mntfh->data + mntfh->size, 0,
-		       sizeof(mntfh->data) - mntfh->size);
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+
+#ifndef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		goto out_v3_not_compiled;
+#endif /* !CONFIG_NFS_V3 */
+
+	if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+		goto out_no_address;
 
 	return 0;
+
+out_no_data:
+	dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
+	return -EINVAL;
+
+out_no_v3:
+	dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
+		 data->version);
+	return -EINVAL;
+
+out_no_sec:
+	dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
+	return -EINVAL;
+
+#ifndef CONFIG_NFS_V3
+out_v3_not_compiled:
+	dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
+	return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V3 */
+
+out_no_address:
+	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+	return -EINVAL;
+
+out_invalid_fh:
+	dfprintk(MOUNT, "NFS: invalid root filehandle\n");
+	return -EINVAL;
 }
 
 /*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 {
 	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->nfs_client != server->nfs_client)
+	if (memcmp(&old->nfs_client->cl_addr,
+				&server->nfs_client->cl_addr,
+				sizeof(old->nfs_client->cl_addr)) != 0)
+		return 0;
+	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
+	if (old->flags & NFS_MOUNT_UNSHARED)
 		return 0;
 	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
 	return 1;
 }
 
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+{
+	const struct nfs_server *a = s->s_fs_info;
+	const struct rpc_clnt *clnt_a = a->client;
+	const struct rpc_clnt *clnt_b = b->client;
+
+	if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+		goto Ebusy;
+	if (a->nfs_client != b->nfs_client)
+		goto Ebusy;
+	if (a->flags != b->flags)
+		goto Ebusy;
+	if (a->wsize != b->wsize)
+		goto Ebusy;
+	if (a->rsize != b->rsize)
+		goto Ebusy;
+	if (a->acregmin != b->acregmin)
+		goto Ebusy;
+	if (a->acregmax != b->acregmax)
+		goto Ebusy;
+	if (a->acdirmin != b->acdirmin)
+		goto Ebusy;
+	if (a->acdirmax != b->acdirmax)
+		goto Ebusy;
+	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+		goto Ebusy;
+	return 0;
+Ebusy:
+	return -EBUSY;
+}
+
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	/* Validate the mount data */
-	error = nfs_validate_mount_data(data, &mntfh);
+	error = nfs_validate_mount_data(&data, &mntfh, dev_name);
 	if (error < 0)
-		return error;
+		goto out;
 
 	/* Get a volume representation */
 	server = nfs_create_server(data, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
-		goto out_err_noserver;
+		goto out;
 	}
 
+	if (server->flags & NFS_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
+		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
+		if (error < 0)
+			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
-	return 0;
+	error = 0;
+
+out:
+	if (data != raw_data)
+		kfree(data);
+	return error;
 
 out_err_nosb:
 	nfs_free_server(server);
-out_err_noserver:
-	return error;
+	goto out;
 
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	return error;
+	goto out;
 }
 
 /*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct nfs_server *server;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
+		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
+		if (error < 0)
+			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
 	nfs_initialise_sb(sb);
 }
 
-static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
+				    const char *dev_name,
+				    struct sockaddr_in *addr,
+				    rpc_authflavor_t *authflavour,
+				    char **hostname,
+				    char **mntpath,
+				    char **ip_addr)
 {
-	void *p = NULL;
-
-	if (!src->len)
-		return ERR_PTR(-EINVAL);
-	if (src->len < maxlen)
-		maxlen = src->len;
-	if (dst == NULL) {
-		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
-		if (p == NULL)
-			return ERR_PTR(-ENOMEM);
-	}
-	if (copy_from_user(dst, src->data, maxlen)) {
-		kfree(p);
-		return ERR_PTR(-EFAULT);
+	struct nfs4_mount_data *data = *options;
+	char *c;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	switch (data->version) {
+	case 1:
+		if (data->host_addrlen != sizeof(*addr))
+			goto out_no_address;
+		if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
+			return -EFAULT;
+		if (addr->sin_port == 0)
+			addr->sin_port = htons(NFS_PORT);
+		if (!nfs_verify_server_address((struct sockaddr *) addr))
+			goto out_no_address;
+
+		switch (data->auth_flavourlen) {
+		case 0:
+			*authflavour = RPC_AUTH_UNIX;
+			break;
+		case 1:
+			if (copy_from_user(authflavour, data->auth_flavours,
+					   sizeof(*authflavour)))
+				return -EFAULT;
+			break;
+		default:
+			goto out_inval_auth;
+		}
+
+		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*hostname = c;
+
+		c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*mntpath = c;
+		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+
+		c = strndup_user(data->client_addr.data, 16);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*ip_addr = c;
+
+		break;
+	default: {
+		unsigned int len;
+		struct nfs_parsed_mount_data args = {
+			.rsize		= NFS_MAX_FILE_IO_SIZE,
+			.wsize		= NFS_MAX_FILE_IO_SIZE,
+			.timeo		= 600,
+			.retrans	= 2,
+			.acregmin	= 3,
+			.acregmax	= 60,
+			.acdirmin	= 30,
+			.acdirmax	= 60,
+			.nfs_server.protocol = IPPROTO_TCP,
+		};
+
+		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+			return -EINVAL;
+
+		if (!nfs_verify_server_address((struct sockaddr *)
+						&args.nfs_server.address))
+			return -EINVAL;
+		*addr = args.nfs_server.address;
+
+		switch (args.auth_flavor_len) {
+		case 0:
+			*authflavour = RPC_AUTH_UNIX;
+			break;
+		case 1:
+			*authflavour = (rpc_authflavor_t) args.auth_flavors[0];
+			break;
+		default:
+			goto out_inval_auth;
+		}
+
+		/*
+		 * Translate to nfs4_mount_data, which nfs4_fill_super
+		 * can deal with.
+		 */
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (data == NULL)
+			return -ENOMEM;
+		*options = data;
+
+		data->version	= 1;
+		data->flags	= args.flags & NFS4_MOUNT_FLAGMASK;
+		data->rsize	= args.rsize;
+		data->wsize	= args.wsize;
+		data->timeo	= args.timeo;
+		data->retrans	= args.retrans;
+		data->acregmin	= args.acregmin;
+		data->acregmax	= args.acregmax;
+		data->acdirmin	= args.acdirmin;
+		data->acdirmax	= args.acdirmax;
+		data->proto	= args.nfs_server.protocol;
+
+		/*
+		 * Split "dev_name" into "hostname:mntpath".
+		 */
+		c = strchr(dev_name, ':');
+		if (c == NULL)
+			return -EINVAL;
+		/* while calculating len, pretend ':' is '\0' */
+		len = c - dev_name;
+		if (len > NFS4_MAXNAMLEN)
+			return -EINVAL;
+		*hostname = kzalloc(len, GFP_KERNEL);
+		if (*hostname == NULL)
+			return -ENOMEM;
+		strncpy(*hostname, dev_name, len - 1);
+
+		c++;			/* step over the ':' */
+		len = strlen(c);
+		if (len > NFS4_MAXPATHLEN)
+			return -EINVAL;
+		*mntpath = kzalloc(len + 1, GFP_KERNEL);
+		if (*mntpath == NULL)
+			return -ENOMEM;
+		strncpy(*mntpath, c, len);
+
+		dprintk("MNTPATH: %s\n", *mntpath);
+
+		*ip_addr = args.client_address;
+
+		break;
+		}
 	}
-	dst[maxlen] = '\0';
-	return dst;
+
+	return 0;
+
+out_no_data:
+	dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
+	return -EINVAL;
+
+out_inval_auth:
+	dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
+		 data->auth_flavourlen);
+	return -EINVAL;
+
+out_no_address:
+	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
+	return -EINVAL;
 }
 
 /*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	rpc_authflavor_t authflavour;
 	struct nfs_fh mntfh;
 	struct dentry *mntroot;
-	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
-	void *p;
+	char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return -EINVAL;
-	}
-	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(addr))
-		return -EINVAL;
-
-	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
-		return -EFAULT;
-
-	if (addr.sin_family != AF_INET ||
-	    addr.sin_addr.s_addr == INADDR_ANY
-	    ) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		return -EINVAL;
-	}
-	/* RFC3530: The default port for NFS is 2049 */
-	if (addr.sin_port == 0)
-		addr.sin_port = htons(NFS_PORT);
-
-	/* Grab the authentication type */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			error = -EINVAL;
-			goto out_err_noserver;
-		}
-
-		if (copy_from_user(&authflavour, data->auth_flavours,
-				   sizeof(authflavour))) {
-			error = -EFAULT;
-			goto out_err_noserver;
-		}
-	}
-
-	p = nfs_copy_user_string(NULL, &data->hostname, 256);
-	if (IS_ERR(p))
-		goto out_err;
-	hostname = p;
-
-	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
-	if (IS_ERR(p))
-		goto out_err;
-	mntpath = p;
-
-	dprintk("MNTPATH: %s\n", mntpath);
-
-	p = nfs_copy_user_string(ip_addr, &data->client_addr,
-				 sizeof(ip_addr) - 1);
-	if (IS_ERR(p))
-		goto out_err;
+	/* Validate the mount data */
+	error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
+					 &hostname, &mntpath, &ip_addr);
+	if (error < 0)
+		goto out;
 
 	/* Get a volume representation */
 	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
 				    authflavour, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
-		goto out_err_noserver;
+		goto out;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
+	error = 0;
+
+out:
+	kfree(ip_addr);
 	kfree(mntpath);
 	kfree(hostname);
-	return 0;
-
-out_err:
-	error = PTR_ERR(p);
-	goto out_err_noserver;
+	return error;
 
 out_free:
 	nfs_free_server(server);
-out_err_noserver:
-	kfree(mntpath);
-	kfree(hostname);
-	return error;
+	goto out;
 
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	goto out_err_noserver;
+	goto out;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct nfs_server *server;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 	struct nfs_server *server;
 	struct dentry *mntroot;
 	struct nfs_fh mntfh;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e0..73ac992ece8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
 	if (PagePrivate(page)) {
 		req = (struct nfs_page *)page_private(page);
 		if (req != NULL)
-			atomic_inc(&req->wb_count);
+			kref_get(&req->wb_kref);
 	}
 	return req;
 }
 
 static struct nfs_page *nfs_page_find_request(struct page *page)
 {
+	struct inode *inode = page->mapping->host;
 	struct nfs_page *req = NULL;
-	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
 
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	req = nfs_page_find_request_locked(page);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	return req;
 }
 
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	}
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
-	/* Set the PG_uptodate flag? */
-	nfs_mark_uptodate(page, offset, count);
 	nfs_unlock_request(req);
 	return 0;
 }
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				struct page *page)
 {
+	struct inode *inode = page->mapping->host;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
-	struct nfs_inode *nfsi = NFS_I(page->mapping->host);
-	spinlock_t *req_lock = &nfsi->req_lock;
 	int ret;
 
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	for(;;) {
 		req = nfs_page_find_request_locked(page);
 		if (req == NULL) {
-			spin_unlock(req_lock);
+			spin_unlock(&inode->i_lock);
 			return 1;
 		}
 		if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 		 *	 succeed provided that someone hasn't already marked the
 		 *	 request as dirty (in which case we don't care).
 		 */
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		ret = nfs_wait_on_request(req);
 		nfs_release_request(req);
 		if (ret != 0)
 			return ret;
-		spin_lock(req_lock);
+		spin_lock(&inode->i_lock);
 	}
 	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 		/* This request is marked for commit */
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_unlock_request(req);
 		nfs_pageio_complete(pgio);
 		return 1;
 	}
 	if (nfs_set_page_writeback(page) != 0) {
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		BUG();
 	}
 	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
-			NFS_PAGE_TAG_WRITEBACK);
+			NFS_PAGE_TAG_LOCKED);
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	nfs_pageio_add_request(pgio, req);
 	return ret;
 }
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	if (PageDirty(req->wb_page))
 		set_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfsi->npages++;
-	atomic_inc(&req->wb_count);
+	kref_get(&req->wb_kref);
 	return 0;
 }
 
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		__set_page_dirty_nobuffers(req->wb_page);
 	nfsi->npages--;
 	if (!nfsi->npages) {
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_end_data_update(inode);
 		iput(inode);
 	} else
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 	nfs_clear_request(req);
 	nfs_release_request(req);
 }
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
 static void
 nfs_mark_request_commit(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	spin_lock(&nfsi->req_lock);
-	nfs_list_add_request(req, &nfsi->commit);
+	spin_lock(&inode->i_lock);
 	nfsi->ncommit++;
 	set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
-	spin_unlock(&nfsi->req_lock);
+	radix_tree_tag_set(&nfsi->nfs_page_tree,
+			req->wb_index,
+			NFS_PAGE_TAG_COMMIT);
+	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 		idx_end = idx_start + npages - 1;
 
 	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
+	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
 		if (req->wb_index > idx_end)
 			break;
 
 		next = req->wb_index + 1;
 		BUG_ON(!NFS_WBACK_BUSY(req));
 
-		atomic_inc(&req->wb_count);
-		spin_unlock(&nfsi->req_lock);
+		kref_get(&req->wb_kref);
+		spin_unlock(&inode->i_lock);
 		error = nfs_wait_on_request(req);
 		nfs_release_request(req);
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
 		if (error < 0)
 			return error;
 		res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
 	int res = 0;
 
 	if (nfsi->ncommit != 0) {
-		res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
+		res = nfs_scan_list(nfsi, dst, idx_start, npages,
+				NFS_PAGE_TAG_COMMIT);
 		nfsi->ncommit -= res;
-		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
 	}
 	return res;
 }
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	pgoff_t		rqend, end;
 
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
 		req = nfs_page_find_request_locked(page);
 		if (req) {
 			if (!nfs_lock_request_dontget(req)) {
 				int error;
 
-				spin_unlock(&nfsi->req_lock);
+				spin_unlock(&inode->i_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
 				if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				}
 				continue;
 			}
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			if (new)
 				nfs_release_request(new);
 			break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 			nfs_lock_request_dontget(new);
 			error = nfs_inode_add_request(inode, new);
 			if (error) {
-				spin_unlock(&nfsi->req_lock);
+				spin_unlock(&inode->i_lock);
 				nfs_unlock_request(new);
 				return ERR_PTR(error);
 			}
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			return new;
 		}
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 
 		new = nfs_create_request(ctx, inode, page, offset, bytes);
 		if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
 static void nfs_writepage_release(struct nfs_page *req)
 {
 
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+	if (PageError(req->wb_page)) {
+		nfs_end_page_writeback(req->wb_page);
+		nfs_inode_remove_request(req);
+	} else if (!nfs_reschedule_unstable_write(req)) {
+		/* Set the PG_uptodate flag */
+		nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
 		nfs_end_page_writeback(req->wb_page);
 		nfs_inode_remove_request(req);
 	} else
 		nfs_end_page_writeback(req->wb_page);
-	nfs_clear_page_writeback(req);
+	nfs_clear_page_tag_locked(req);
 }
 
 static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	 * NB: take care not to mess about with data->commit et al. */
 
 	data->req = req;
-	data->inode = inode = req->wb_context->dentry->d_inode;
+	data->inode = inode = req->wb_context->path.dentry->d_inode;
 	data->cred = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
 	}
 	nfs_redirty_request(req);
 	nfs_end_page_writeback(req->wb_page);
-	nfs_clear_page_writeback(req);
+	nfs_clear_page_tag_locked(req);
 	return -ENOMEM;
 }
 
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 		nfs_list_remove_request(req);
 		nfs_redirty_request(req);
 		nfs_end_page_writeback(req->wb_page);
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
 }
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 	struct page		*page = req->wb_page;
 
 	dprintk("NFS: write (%s/%Ld %d@%Ld)",
-		req->wb_context->dentry->d_inode->i_sb->s_id,
-		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+		req->wb_context->path.dentry->d_inode->i_sb->s_id,
+		(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 		req->wb_bytes,
 		(long long)req_offset(req));
 
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 	}
 
 	if (nfs_write_need_commit(data)) {
-		spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+		struct inode *inode = page->mapping->host;
 
-		spin_lock(req_lock);
+		spin_lock(&inode->i_lock);
 		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
 			/* Do nothing we need to resend the writes */
 		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
 			dprintk(" server reboot detected\n");
 		}
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 	} else
 		dprintk(" OK\n");
 
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		page = req->wb_page;
 
 		dprintk("NFS: write (%s/%Ld %d@%Ld)",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 			dprintk(" marked for commit\n");
 			goto next;
 		}
+		/* Set the PG_uptodate flag? */
+		nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 		dprintk(" OK\n");
 remove_request:
 		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
 	next:
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 }
 
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
-	inode = first->wb_context->dentry->d_inode;
+	inode = first->wb_context->path.dentry->d_inode;
 
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
 }
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 
 		dprintk("NFS: commit (%s/%Ld %d@%Ld)",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		 * returned by the server against all stored verfs. */
 		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
 			/* We have a match */
+			/* Set the PG_uptodate flag */
+			nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
+					req->wb_bytes);
 			nfs_inode_remove_request(req);
 			dprintk(" OK\n");
 			goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		dprintk(" mismatch\n");
 		nfs_redirty_request(req);
 	next:
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 }
 
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
 
 int nfs_commit_inode(struct inode *inode, int how)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
 	int res;
 
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
-	spin_unlock(&nfsi->req_lock);
+	spin_unlock(&inode->i_lock);
 	if (res) {
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
-	struct nfs_inode *nfsi = NFS_I(inode);
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		}
 	}
 	how &= ~FLUSH_NOCOMMIT;
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		if (pages == 0)
 			break;
 		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			nfs_cancel_commit_list(&head);
 			ret = pages;
-			spin_lock(&nfsi->req_lock);
+			spin_lock(&inode->i_lock);
 			continue;
 		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
+
 	} while (ret >= 0);
-	spin_unlock(&nfsi->req_lock);
+	spin_unlock(&inode->i_lock);
 	return ret;
 }
 
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode;
-	spinlock_t *req_lock;
 	struct nfs_page *req;
 	int ret;
 
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
 	inode = mapping->host;
 	if (!inode)
 		goto out_raced;
-	req_lock = &NFS_I(inode)->req_lock;
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	req = nfs_page_find_request_locked(page);
 	if (req != NULL) {
 		/* Mark any existing write requests for flushing */
 		ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_release_request(req);
 		return ret;
 	}
 	ret = __set_page_dirty_nobuffers(page);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	return ret;
 out_raced:
 	return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28..5443c52b57a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char clientname[16];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	memset(program->stats, 0, sizeof(cb->cb_stat));
 	program->stats->program = program;
 
-	/* Just here to make some printk's more useful: */
-	snprintf(clientname, sizeof(clientname),
-		"%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
-	args.servername = clientname;
-
 	/* Create RPC client */
 	cb->cb_client = rpc_create(&args);
 	if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 		goto out_err;
 	}
 
-	/* Kick rpciod, put the call on the wire. */
-	if (rpciod_up() != 0)
-		goto out_clnt;
-
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
 
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
-		goto out_rpciod;
+		goto out_release_clp;
 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
 		dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
-		goto out_rpciod;
+		goto out_release_clp;
 	}
 	return;
 
-out_rpciod:
+out_release_clp:
 	atomic_dec(&clp->cl_count);
-	rpciod_down();
-out_clnt:
 	rpc_shutdown_client(cb->cb_client);
 out_err:
 	cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab..8c52913d7cb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 	if (clnt) {
 		clp->cl_callback.cb_client = NULL;
 		rpc_shutdown_client(clnt);
-		rpciod_down();
 	}
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d..945b1cedde2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/major.h>
-#include <linux/ext2_fs.h>
+#include <linux/splice.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
 }
 
 /*
- * Grab and keep cached pages assosiated with a file in the svc_rqst
- * so that they can be passed to the netowork sendmsg/sendpage routines
- * directrly. They will be released after the sending has completed.
+ * Grab and keep cached pages associated with a file in the svc_rqst
+ * so that they can be passed to the network sendmsg/sendpage routines
+ * directly. They will be released after the sending has completed.
  */
 static int
-nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+		  struct splice_desc *sd)
 {
-	unsigned long count = desc->count;
-	struct svc_rqst *rqstp = desc->arg.data;
+	struct svc_rqst *rqstp = sd->u.data;
 	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+	struct page *page = buf->page;
+	size_t size;
+	int ret;
+
+	ret = buf->ops->confirm(pipe, buf);
+	if (unlikely(ret))
+		return ret;
 
-	if (size > count)
-		size = count;
+	size = sd->len;
 
 	if (rqstp->rq_res.page_len == 0) {
 		get_page(page);
 		put_page(*pp);
 		*pp = page;
 		rqstp->rq_resused++;
-		rqstp->rq_res.page_base = offset;
+		rqstp->rq_res.page_base = buf->offset;
 		rqstp->rq_res.page_len = size;
 	} else if (page != pp[-1]) {
 		get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
 	} else
 		rqstp->rq_res.page_len += size;
 
-	desc->count = count - size;
-	desc->written += size;
 	return size;
 }
 
+static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
+				    struct splice_desc *sd)
+{
+	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
+}
+
 static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,16 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	if (ra && ra->p_set)
 		file->f_ra = ra->p_ra;
 
-	if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
+	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
+		struct splice_desc sd = {
+			.len		= 0,
+			.total_len	= *count,
+			.pos		= offset,
+			.u.data		= rqstp,
+		};
+
 		rqstp->rq_resused = 1;
-		host_err = file->f_op->sendfile(file, &offset, *count,
-						 nfsd_read_actor, rqstp);
+		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
 		set_fs(KERNEL_DS);
diff --git a/fs/nls/Makefile b/fs/nls/Makefile
index a7ade138d68..f499dd7c390 100644
--- a/fs/nls/Makefile
+++ b/fs/nls/Makefile
@@ -36,11 +36,9 @@ obj-$(CONFIG_NLS_ISO8859_6)	+= nls_iso8859-6.o
 obj-$(CONFIG_NLS_ISO8859_7)	+= nls_iso8859-7.o
 obj-$(CONFIG_NLS_ISO8859_8)	+= nls_cp1255.o
 obj-$(CONFIG_NLS_ISO8859_9)	+= nls_iso8859-9.o
-obj-$(CONFIG_NLS_ISO8859_10)	+= nls_iso8859-10.o
 obj-$(CONFIG_NLS_ISO8859_13)	+= nls_iso8859-13.o
 obj-$(CONFIG_NLS_ISO8859_14)	+= nls_iso8859-14.o
 obj-$(CONFIG_NLS_ISO8859_15)	+= nls_iso8859-15.o
 obj-$(CONFIG_NLS_KOI8_R)	+= nls_koi8-r.o
 obj-$(CONFIG_NLS_KOI8_U)	+= nls_koi8-u.o nls_koi8-ru.o
-obj-$(CONFIG_NLS_ABC)		+= nls_abc.o
 obj-$(CONFIG_NLS_UTF8)		+= nls_utf8.o
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b58..ffcc504a166 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
 						    mounted filesystem. */
 	.mmap		= generic_file_mmap,	 /* Mmap file. */
 	.open		= ntfs_file_open,	 /* Open file. */
-	.sendfile	= generic_file_sendfile, /* Zero-copy data send with
+	.splice_read	= generic_file_splice_read /* Zero-copy data send with
 						    the data source being on
 						    the ntfs partition.  We do
 						    not need to care about the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0023b31e48a..a480b09c79b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -798,6 +798,11 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
 	}
 	to = from + bytes;
 
+	BUG_ON(from > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from < cluster_start);
+	BUG_ON(to > cluster_end);
+
 	if (wc->w_this_page_new)
 		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
 					    cluster_start, cluster_end, 1);
@@ -809,11 +814,6 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
 		goto out;
 	}
 
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
-	BUG_ON(from > osb->s_clustersize);
-	BUG_ON(to > osb->s_clustersize);
-
 	src = buf->ops->map(sp->s_pipe, buf, 1);
 	dst = kmap_atomic(wc->w_this_page, KM_USER1);
 	memcpy(dst + from, src + src_from, bytes);
@@ -890,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
 
 	to = from + bytes;
 
+	BUG_ON(from > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from < cluster_start);
+	BUG_ON(to > cluster_end);
+
 	if (wc->w_this_page_new)
 		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
 					    cluster_start, cluster_end, 1);
@@ -901,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
 		goto out;
 	}
 
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
-	BUG_ON(from > osb->s_clustersize);
-	BUG_ON(to > osb->s_clustersize);
-
 	dst = kmap(wc->w_this_page);
 	memcpy(dst + from, bp->b_src_buf + src_from, bytes);
 	kunmap(wc->w_this_page);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index a93620ce4ac..e9e042b93db 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
 #define define_mask(_name) {			\
 	.attr = {				\
 		.name = #_name,			\
-		.owner = THIS_MODULE,		\
 		.mode = S_IRUGO | S_IWUSR,	\
 	},					\
 	.mask = ML_##_name,			\
@@ -144,8 +143,7 @@ static struct kobj_type mlog_ktype = {
 };
 
 static struct kset mlog_kset = {
-	.kobj  = {.name = "logmask"},
-	.ktype = &mlog_ktype
+	.kobj   = {.name = "logmask", .ktype = &mlog_ktype},
 };
 
 int mlog_sys_init(struct kset *o2cb_subsys)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bb..4979b667571 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mount.h>
 #include <linux/writeback.h>
 
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
 	ssize_t copied = 0;
 	struct ocfs2_splice_write_priv sp;
 
-	ret = buf->ops->pin(pipe, buf);
+	ret = buf->ops->confirm(pipe, buf);
 	if (ret)
 		goto out;
 
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
 		 * might enter ocfs2_buffered_write_cluster() more
 		 * than once, so keep track of our progress here.
 		 */
-		copied = ocfs2_buffered_write_cluster(sd->file,
+		copied = ocfs2_buffered_write_cluster(sd->u.file,
 						      (loff_t)sd->pos + total,
 						      count,
 						      ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 	int ret, err;
 	struct address_space *mapping = out->f_mapping;
 	struct inode *inode = mapping->host;
-
-	ret = __splice_from_pipe(pipe, out, ppos, len, flags,
-				 ocfs2_splice_write_actor);
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
+
+	ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
 	if (ret > 0) {
 		*ppos += ret;
 
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
 const struct file_operations ocfs2_fops = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.sendfile	= generic_file_sendfile,
 	.mmap		= ocfs2_mmap,
 	.fsync		= ocfs2_sync_file,
 	.release	= ocfs2_file_release,
diff --git a/fs/open.c b/fs/open.c
index 0d515d16197..be6a457f422 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -855,7 +855,7 @@ EXPORT_SYMBOL(dentry_open);
 /*
  * Find an empty file descriptor entry, and mark it busy.
  */
-int get_unused_fd(void)
+int get_unused_fd_flags(int flags)
 {
 	struct files_struct * files = current->files;
 	int fd, error;
@@ -891,7 +891,10 @@ repeat:
 	}
 
 	FD_SET(fd, fdt->open_fds);
-	FD_CLR(fd, fdt->close_on_exec);
+	if (flags & O_CLOEXEC)
+		FD_SET(fd, fdt->close_on_exec);
+	else
+		FD_CLR(fd, fdt->close_on_exec);
 	files->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
@@ -907,6 +910,11 @@ out:
 	return error;
 }
 
+int get_unused_fd(void)
+{
+	return get_unused_fd_flags(0);
+}
+
 EXPORT_SYMBOL(get_unused_fd);
 
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
@@ -959,7 +967,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 	int fd = PTR_ERR(tmp);
 
 	if (!IS_ERR(tmp)) {
-		fd = get_unused_fd();
+		fd = get_unused_fd_flags(flags);
 		if (fd >= 0) {
 			struct file *f = do_filp_open(dfd, tmp, flags, mode);
 			if (IS_ERR(f)) {
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index e3491328596..3d3e1663147 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -25,6 +25,8 @@
 #define PARTITION_RISCIX_SCSI	2
 #define PARTITION_LINUX		9
 
+#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
+	defined(CONFIG_ACORN_PARTITION_ADFS)
 static struct adfs_discrecord *
 adfs_partition(struct parsed_partitions *state, char *name, char *data,
 	       unsigned long first_sector, int slot)
@@ -48,6 +50,7 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data,
 	put_partition(state, slot, first_sector, nr_sects);
 	return dr;
 }
+#endif
 
 #ifdef CONFIG_ACORN_PARTITION_RISCIX
 
@@ -65,6 +68,8 @@ struct riscix_record {
 	struct riscix_part part[8];
 };
 
+#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
+	defined(CONFIG_ACORN_PARTITION_ADFS)
 static int
 riscix_partition(struct parsed_partitions *state, struct block_device *bdev,
 		unsigned long first_sect, int slot, unsigned long nr_sects)
@@ -105,6 +110,7 @@ riscix_partition(struct parsed_partitions *state, struct block_device *bdev,
 	return slot;
 }
 #endif
+#endif
 
 #define LINUX_NATIVE_MAGIC 0xdeafa1de
 #define LINUX_SWAP_MAGIC   0xdeafab1e
@@ -115,6 +121,8 @@ struct linux_part {
 	__le32 nr_sects;
 };
 
+#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
+	defined(CONFIG_ACORN_PARTITION_ADFS)
 static int
 linux_partition(struct parsed_partitions *state, struct block_device *bdev,
 		unsigned long first_sect, int slot, unsigned long nr_sects)
@@ -146,6 +154,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
 	put_dev_sector(sect);
 	return slot;
 }
+#endif
 
 #ifdef CONFIG_ACORN_PARTITION_CUMANA
 int
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f355..98e0b85a9bb 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 		static struct attribute addpartattr = {
 			.name = "whole_disk",
 			.mode = S_IRUSR | S_IRGRP | S_IROTH,
-			.owner = THIS_MODULE,
 		};
 
 		sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 9f7ad4244f6..1e064c4a4f8 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
 	int blocksize, offset, size,res;
 	loff_t i_size;
-	dasd_information_t *info;
+	dasd_information2_t *info;
 	struct hd_geometry *geo;
 	char type[5] = {0,};
 	char name[7] = {0,};
@@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	if (i_size == 0)
 		goto out_exit;
 
-	if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL)
+	info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
+	if (info == NULL)
 		goto out_exit;
-	if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
+	geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
+	if (geo == NULL)
 		goto out_nogeo;
-	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
+	label = kmalloc(sizeof(union label_t), GFP_KERNEL);
+	if (label == NULL)
 		goto out_nolab;
 
-	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
+	if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
 		goto out_freeall;
 
@@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	res = 1;
 
 	/*
-	 * Three different types: CMS1, VOL1 and LNX1/unlabeled
+	 * Three different formats: LDL, CDL and unformated disk
+	 *
+	 * identified by info->format
+	 *
+	 * unformated disks we do not have to care about
 	 */
-	if (strncmp(type, "CMS1", 4) == 0) {
-		/*
-		 * VM style CMS1 labeled disk
-		 */
-		if (label->cms.disk_offset != 0) {
-			printk("CMS1/%8s(MDSK):", name);
-			/* disk is reserved minidisk */
-			blocksize = label->cms.block_size;
-			offset = label->cms.disk_offset;
-			size = (label->cms.block_count - 1) * (blocksize >> 9);
+	if (info->format == DASD_FORMAT_LDL) {
+		if (strncmp(type, "CMS1", 4) == 0) {
+			/*
+			 * VM style CMS1 labeled disk
+			 */
+			if (label->cms.disk_offset != 0) {
+				printk("CMS1/%8s(MDSK):", name);
+				/* disk is reserved minidisk */
+				blocksize = label->cms.block_size;
+				offset = label->cms.disk_offset;
+				size = (label->cms.block_count - 1)
+					* (blocksize >> 9);
+			} else {
+				printk("CMS1/%8s:", name);
+				offset = (info->label_block + 1);
+				size = i_size >> 9;
+			}
 		} else {
-			printk("CMS1/%8s:", name);
+			/*
+			 * Old style LNX1 or unlabeled disk
+			 */
+			if (strncmp(type, "LNX1", 4) == 0)
+				printk ("LNX1/%8s:", name);
+			else
+				printk("(nonl)");
 			offset = (info->label_block + 1);
 			size = i_size >> 9;
 		}
 		put_partition(state, 1, offset*(blocksize >> 9),
-				 size-offset*(blocksize >> 9));
-	} else if ((strncmp(type, "VOL1", 4) == 0) &&
-		(!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
+				      size-offset*(blocksize >> 9));
+	} else if (info->format == DASD_FORMAT_CDL) {
 		/*
-		 * New style VOL1 labeled disk
+		 * New style CDL formatted disk
 		 */
 		unsigned int blk;
 		int counter;
 
-		printk("VOL1/%8s:", name);
-
-		/* get block number and read then go through format1 labels */
-		blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-		counter = 0;
-		while ((data = read_dev_sector(bdev, blk*(blocksize/512),
-					       &sect)) != NULL) {
-			struct vtoc_format1_label f1;
-
-			memcpy(&f1, data, sizeof(struct vtoc_format1_label));
-			put_dev_sector(sect);
-
-			/* skip FMT4 / FMT5 / FMT7 labels */
-			if (f1.DS1FMTID == _ascebc['4']
-			    || f1.DS1FMTID == _ascebc['5']
-			    || f1.DS1FMTID == _ascebc['7']) {
-			        blk++;
-				continue;
-			}
-
-			/* only FMT1 valid at this point */
-			if (f1.DS1FMTID != _ascebc['1'])
-				break;
-
-			/* OK, we got valid partition data */
-		        offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-			size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
-				offset + geo->sectors;
-			if (counter >= state->limit)
-				break;
-			put_partition(state, counter + 1,
-				      offset * (blocksize >> 9),
-				      size * (blocksize >> 9));
-			counter++;
-			blk++;
-		}
-		if (!data)
-		/* Are we not supposed to report this ? */
-			goto out_readerr;
-	} else {
 		/*
-		 * Old style LNX1 or unlabeled disk
+		 * check if VOL1 label is available
+		 * if not, something is wrong, skipping partition detection
 		 */
-		if (strncmp(type, "LNX1", 4) == 0)
-			printk ("LNX1/%8s:", name);
-		else
-			printk("(nonl)/%8s:", name);
-		offset = (info->label_block + 1);
-		size = i_size >> 9;
-		put_partition(state, 1, offset*(blocksize >> 9),
-			      size-offset*(blocksize >> 9));
+		if (strncmp(type, "VOL1",  4) == 0) {
+			printk("VOL1/%8s:", name);
+			/*
+			 * get block number and read then go through format1
+			 * labels
+			 */
+			blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
+			counter = 0;
+			data = read_dev_sector(bdev, blk * (blocksize/512),
+					       &sect);
+			while (data != NULL) {
+				struct vtoc_format1_label f1;
+
+				memcpy(&f1, data,
+				       sizeof(struct vtoc_format1_label));
+				put_dev_sector(sect);
+
+				/* skip FMT4 / FMT5 / FMT7 labels */
+				if (f1.DS1FMTID == _ascebc['4']
+				    || f1.DS1FMTID == _ascebc['5']
+				    || f1.DS1FMTID == _ascebc['7']) {
+					blk++;
+					data = read_dev_sector(bdev, blk *
+							       (blocksize/512),
+								&sect);
+					continue;
+				}
+
+				/* only FMT1 valid at this point */
+				if (f1.DS1FMTID != _ascebc['1'])
+					break;
+
+				/* OK, we got valid partition data */
+				offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
+				size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
+					offset + geo->sectors;
+				if (counter >= state->limit)
+					break;
+				put_partition(state, counter + 1,
+					      offset * (blocksize >> 9),
+					      size * (blocksize >> 9));
+				counter++;
+				blk++;
+				data = read_dev_sector(bdev,
+						       blk * (blocksize/512),
+						       &sect);
+			}
+
+			if (!data)
+				/* Are we not supposed to report this ? */
+				goto out_readerr;
+		} else
+			printk(KERN_WARNING "Warning, expected Label VOL1 not "
+			       "found, treating as CDL formated Disk");
+
 	}
 
 	printk("\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf5..d007830d9c8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 		page_cache_release(page);
 }
 
+/**
+ * generic_pipe_buf_map - virtually map a pipe buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer that should be mapped
+ * @atomic:	whether to use an atomic map
+ *
+ * Description:
+ *	This function returns a kernel virtual address mapping for the
+ *	passed in @pipe_buffer. If @atomic is set, an atomic map is provided
+ *	and the caller has to be careful not to fault before calling
+ *	the unmap function.
+ *
+ *	Note that this function occupies KM_USER0 if @atomic != 0.
+ */
 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
 			   struct pipe_buffer *buf, int atomic)
 {
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
 	return kmap(buf->page);
 }
 
+/**
+ * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer that should be unmapped
+ * @map_data:	the data that the mapping function returned
+ *
+ * Description:
+ *	This function undoes the mapping that ->map() provided.
+ */
 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 			    struct pipe_buffer *buf, void *map_data)
 {
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 		kunmap(buf->page);
 }
 
+/**
+ * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to attempt to steal
+ *
+ * Description:
+ *	This function attempts to steal the @struct page attached to
+ *	@buf. If successful, this function returns 0 and returns with
+ *	the page locked. The caller may then reuse the page for whatever
+ *	he wishes, the typical use is insertion into a different file
+ *	page cache.
+ */
 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
 			   struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 
+	/*
+	 * A reference of one is golden, that means that the owner of this
+	 * page is the only one holding a reference to it. lock the page
+	 * and return OK.
+	 */
 	if (page_count(page) == 1) {
 		lock_page(page);
 		return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
 	return 1;
 }
 
-void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to get a reference to
+ *
+ * Description:
+ *	This function grabs an extra reference to @buf. It's used in
+ *	in the tee() system call, when we duplicate the buffers in one
+ *	pipe into another.
+ */
+void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
 	page_cache_get(buf->page);
 }
 
-int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to confirm
+ *
+ * Description:
+ *	This function does nothing, because the generic pipe code uses
+ *	pages that are always good when inserted into the pipe.
+ */
+int generic_pipe_buf_confirm(struct pipe_inode_info *info,
+			     struct pipe_buffer *buf)
 {
 	return 0;
 }
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
-	.pin = generic_pipe_buf_pin,
+	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = generic_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 			if (chars > total_len)
 				chars = total_len;
 
-			error = ops->pin(pipe, buf);
+			error = ops->confirm(pipe, buf);
 			if (error) {
 				if (!ret)
 					error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 			int error, atomic = 1;
 			void *addr;
 
-			error = ops->pin(pipe, buf);
+			error = ops->confirm(pipe, buf);
 			if (error)
 				goto out;
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c038..9cbab7e9355 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 	rcu_read_lock();
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
-		"SleepAVG:\t%lu%%\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
-		(p->sleep_avg/1024)*100/(1020000000/1024),
 	       	p->tgid, p->pid,
 	       	pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
 		pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -291,6 +289,15 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
 			    cap_t(p->cap_effective));
 }
 
+static inline char *task_context_switch_counts(struct task_struct *p,
+						char *buffer)
+{
+	return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+			    "nonvoluntary_ctxt_switches:\t%lu\n",
+			    p->nvcsw,
+			    p->nivcsw);
+}
+
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
@@ -309,9 +316,45 @@ int proc_pid_status(struct task_struct *task, char * buffer)
 #if defined(CONFIG_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
+	buffer = task_context_switch_counts(task, buffer);
 	return buffer - orig;
 }
 
+static clock_t task_utime(struct task_struct *p)
+{
+	clock_t utime = cputime_to_clock_t(p->utime),
+		total = utime + cputime_to_clock_t(p->stime);
+	u64 temp;
+
+	/*
+	 * Use CFS's precise accounting:
+	 */
+	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+	if (total) {
+		temp *= utime;
+		do_div(temp, total);
+	}
+	utime = (clock_t)temp;
+
+	return utime;
+}
+
+static clock_t task_stime(struct task_struct *p)
+{
+	clock_t stime = cputime_to_clock_t(p->stime);
+
+	/*
+	 * Use CFS's precise accounting. (we subtract utime from
+	 * the total, to make sure the total observed by userspace
+	 * grows monotonically - apps rely on that):
+	 */
+	stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+
+	return stime;
+}
+
+
 static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 {
 	unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +369,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	unsigned long long start_time;
 	unsigned long cmin_flt = 0, cmaj_flt = 0;
 	unsigned long  min_flt = 0,  maj_flt = 0;
-	cputime_t cutime, cstime, utime, stime;
+	cputime_t cutime, cstime;
+	clock_t utime, stime;
 	unsigned long rsslim = 0;
 	char tcomm[sizeof(task->comm)];
 	unsigned long flags;
@@ -344,7 +388,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 
 	sigemptyset(&sigign);
 	sigemptyset(&sigcatch);
-	cutime = cstime = utime = stime = cputime_zero;
+	cutime = cstime = cputime_zero;
+	utime = stime = 0;
 
 	rcu_read_lock();
 	if (lock_task_sighand(task, &flags)) {
@@ -370,15 +415,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime = cputime_add(utime, t->utime);
-				stime = cputime_add(stime, t->stime);
+				utime += task_utime(t);
+				stime += task_stime(t);
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime = cputime_add(utime, sig->utime);
-			stime = cputime_add(stime, sig->stime);
+			utime += cputime_to_clock_t(sig->utime);
+			stime += cputime_to_clock_t(sig->stime);
 		}
 
 		sid = signal_session(sig);
@@ -394,8 +439,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
-		utime = task->utime;
-		stime = task->stime;
+		utime = task_utime(task);
+		stime = task_stime(task);
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
@@ -405,8 +450,9 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 
 	/* Temporary variable needed for gcc-2.96 */
 	/* convert timespec -> nsec*/
-	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
-				+ task->start_time.tv_nsec;
+	start_time =
+		(unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
+				+ task->real_start_time.tv_nsec;
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
 
@@ -426,8 +472,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		cmin_flt,
 		maj_flt,
 		cmaj_flt,
-		cputime_to_clock_t(utime),
-		cputime_to_clock_t(stime),
+		utime,
+		stime,
 		cputime_to_clock_t(cutime),
 		cputime_to_clock_t(cstime),
 		priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4..ae3627337a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -67,7 +67,6 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
-#include <linux/seccomp.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
 #include <linux/poll.h>
@@ -204,12 +203,17 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
 	int res = 0;
 	struct mm_struct *mm = get_task_mm(task);
 	if (mm) {
-		unsigned int len = mm->env_end - mm->env_start;
+		unsigned int len;
+
+		res = -ESRCH;
+		if (!ptrace_may_attach(task))
+			goto out;
+
+		len  = mm->env_end - mm->env_start;
 		if (len > PAGE_SIZE)
 			len = PAGE_SIZE;
 		res = access_process_vm(task, mm->env_start, buffer, len, 0);
-		if (!ptrace_may_attach(task))
-			res = -ESRCH;
+out:
 		mmput(mm);
 	}
 	return res;
@@ -296,7 +300,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
  */
 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 {
-	return sprintf(buffer, "%lu %lu %lu\n",
+	return sprintf(buffer, "%llu %llu %lu\n",
 			task->sched_info.cpu_time,
 			task->sched_info.run_delay,
 			task->sched_info.pcnt);
@@ -812,71 +816,6 @@ static const struct file_operations proc_loginuid_operations = {
 };
 #endif
 
-#ifdef CONFIG_SECCOMP
-static ssize_t seccomp_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
-	char __buf[20];
-	size_t len;
-
-	if (!tsk)
-		return -ESRCH;
-	/* no need to print the trailing zero, so use only len */
-	len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
-	put_task_struct(tsk);
-
-	return simple_read_from_buffer(buf, count, ppos, __buf, len);
-}
-
-static ssize_t seccomp_write(struct file *file, const char __user *buf,
-			     size_t count, loff_t *ppos)
-{
-	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
-	char __buf[20], *end;
-	unsigned int seccomp_mode;
-	ssize_t result;
-
-	result = -ESRCH;
-	if (!tsk)
-		goto out_no_task;
-
-	/* can set it only once to be even more secure */
-	result = -EPERM;
-	if (unlikely(tsk->seccomp.mode))
-		goto out;
-
-	result = -EFAULT;
-	memset(__buf, 0, sizeof(__buf));
-	count = min(count, sizeof(__buf) - 1);
-	if (copy_from_user(__buf, buf, count))
-		goto out;
-
-	seccomp_mode = simple_strtoul(__buf, &end, 0);
-	if (*end == '\n')
-		end++;
-	result = -EINVAL;
-	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
-		tsk->seccomp.mode = seccomp_mode;
-		set_tsk_thread_flag(tsk, TIF_SECCOMP);
-	} else
-		goto out;
-	result = -EIO;
-	if (unlikely(!(end - __buf)))
-		goto out;
-	result = end - __buf;
-out:
-	put_task_struct(tsk);
-out_no_task:
-	return result;
-}
-
-static const struct file_operations proc_seccomp_operations = {
-	.read		= seccomp_read,
-	.write		= seccomp_write,
-};
-#endif /* CONFIG_SECCOMP */
-
 #ifdef CONFIG_FAULT_INJECTION
 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
 				      size_t count, loff_t *ppos)
@@ -929,6 +868,69 @@ static const struct file_operations proc_fault_inject_operations = {
 };
 #endif
 
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * Print out various scheduling related per-task fields:
+ */
+static int sched_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct task_struct *p;
+
+	WARN_ON(!inode);
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+	proc_sched_show_task(p, m);
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static ssize_t
+sched_write(struct file *file, const char __user *buf,
+	    size_t count, loff_t *offset)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct task_struct *p;
+
+	WARN_ON(!inode);
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+	proc_sched_set_task(p);
+
+	put_task_struct(p);
+
+	return count;
+}
+
+static int sched_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+
+	ret = single_open(filp, sched_show, NULL);
+	if (!ret) {
+		struct seq_file *m = filp->private_data;
+
+		m->private = inode;
+	}
+	return ret;
+}
+
+static const struct file_operations proc_pid_sched_operations = {
+	.open		= sched_open,
+	.read		= seq_read,
+	.write		= sched_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+#endif
+
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
@@ -1963,6 +1965,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("environ",    S_IRUSR, pid_environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	INF("status",     S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+	REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
+#endif
 	INF("cmdline",    S_IRUGO, pid_cmdline),
 	INF("stat",       S_IRUGO, tgid_stat),
 	INF("statm",      S_IRUGO, pid_statm),
@@ -1971,9 +1976,6 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("numa_maps",  S_IRUGO, numa_maps),
 #endif
 	REG("mem",        S_IRUSR|S_IWUSR, mem),
-#ifdef CONFIG_SECCOMP
-	REG("seccomp",    S_IRUSR|S_IWUSR, seccomp),
-#endif
 	LNK("cwd",        cwd),
 	LNK("root",       root),
 	LNK("exe",        exe),
@@ -2247,6 +2249,9 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("environ",   S_IRUSR, pid_environ),
 	INF("auxv",      S_IRUSR, pid_auxv),
 	INF("status",    S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+	REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
+#endif
 	INF("cmdline",   S_IRUGO, pid_cmdline),
 	INF("stat",      S_IRUGO, tid_stat),
 	INF("statm",     S_IRUGO, pid_statm),
@@ -2255,9 +2260,6 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("numa_maps", S_IRUGO, numa_maps),
 #endif
 	REG("mem",       S_IRUSR|S_IWUSR, mem),
-#ifdef CONFIG_SECCOMP
-	REG("seccomp",   S_IRUSR|S_IWUSR, seccomp),
-#endif
 	LNK("cwd",       cwd),
 	LNK("root",      root),
 	LNK("exe",       exe),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8a40e15f5ec..b5e7155d30d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
+#include <linux/completion.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -529,12 +530,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		return -EAGAIN;
 	dp->low_ino = i;
 
-	spin_lock(&proc_subdir_lock);
-	dp->next = dir->subdir;
-	dp->parent = dir;
-	dir->subdir = dp;
-	spin_unlock(&proc_subdir_lock);
-
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
 			dp->proc_fops = &proc_dir_operations;
@@ -550,6 +545,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		if (dp->proc_iops == NULL)
 			dp->proc_iops = &proc_file_inode_operations;
 	}
+
+	spin_lock(&proc_subdir_lock);
+	dp->next = dir->subdir;
+	dp->parent = dir;
+	dir->subdir = dp;
+	spin_unlock(&proc_subdir_lock);
+
 	return 0;
 }
 
@@ -613,6 +615,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
 	ent->namelen = len;
 	ent->mode = mode;
 	ent->nlink = nlink;
+	ent->pde_users = 0;
+	spin_lock_init(&ent->pde_unload_lock);
+	ent->pde_unload_completion = NULL;
  out:
 	return ent;
 }
@@ -649,9 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
 
 	ent = proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
-		ent->proc_fops = &proc_dir_operations;
-		ent->proc_iops = &proc_dir_inode_operations;
-
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
 			ent = NULL;
@@ -686,10 +688,6 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 
 	ent = proc_create(&parent,name,mode,nlink);
 	if (ent) {
-		if (S_ISDIR(mode)) {
-			ent->proc_fops = &proc_dir_operations;
-			ent->proc_iops = &proc_dir_inode_operations;
-		}
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
 			ent = NULL;
@@ -734,9 +732,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 		de = *p;
 		*p = de->next;
 		de->next = NULL;
+
+		spin_lock(&de->pde_unload_lock);
+		/*
+		 * Stop accepting new callers into module. If you're
+		 * dynamically allocating ->proc_fops, save a pointer somewhere.
+		 */
+		de->proc_fops = NULL;
+		/* Wait until all existing callers into module are done. */
+		if (de->pde_users > 0) {
+			DECLARE_COMPLETION_ONSTACK(c);
+
+			if (!de->pde_unload_completion)
+				de->pde_unload_completion = &c;
+
+			spin_unlock(&de->pde_unload_lock);
+			spin_unlock(&proc_subdir_lock);
+
+			wait_for_completion(de->pde_unload_completion);
+
+			spin_lock(&proc_subdir_lock);
+			goto continue_removing;
+		}
+		spin_unlock(&de->pde_unload_lock);
+
+continue_removing:
 		if (S_ISDIR(de->mode))
 			parent->nlink--;
-		proc_kill_inodes(de);
+		if (!S_ISREG(de->mode))
+			proc_kill_inodes(de);
 		de->nlink = 0;
 		WARN_ON(de->subdir);
 		if (!atomic_read(&de->count))
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d5ce65c68d7..dd28e86ab42 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/stat.h>
+#include <linux/completion.h>
 #include <linux/file.h>
 #include <linux/limits.h>
 #include <linux/init.h>
@@ -140,6 +141,251 @@ static const struct super_operations proc_sops = {
 	.remount_fs	= proc_remount,
 };
 
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+	spin_lock(&pde->pde_unload_lock);
+	pde->pde_users--;
+	if (pde->pde_unload_completion && pde->pde_users == 0)
+		complete(pde->pde_unload_completion);
+	spin_unlock(&pde->pde_unload_lock);
+}
+
+static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	loff_t rv = -EINVAL;
+	loff_t (*llseek)(struct file *, loff_t, int);
+
+	spin_lock(&pde->pde_unload_lock);
+	/*
+	 * remove_proc_entry() is going to delete PDE (as part of module
+	 * cleanup sequence). No new callers into module allowed.
+	 */
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	/*
+	 * Bump refcount so that remove_proc_entry will wail for ->llseek to
+	 * complete.
+	 */
+	pde->pde_users++;
+	/*
+	 * Save function pointer under lock, to protect against ->proc_fops
+	 * NULL'ifying right after ->pde_unload_lock is dropped.
+	 */
+	llseek = pde->proc_fops->llseek;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (!llseek)
+		llseek = default_llseek;
+	rv = llseek(file, offset, whence);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	ssize_t rv = -EIO;
+	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	read = pde->proc_fops->read;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (read)
+		rv = read(file, buf, count, ppos);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	ssize_t rv = -EIO;
+	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	write = pde->proc_fops->write;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (write)
+		rv = write(file, buf, count, ppos);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	unsigned int rv = 0;
+	unsigned int (*poll)(struct file *, struct poll_table_struct *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	poll = pde->proc_fops->poll;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (poll)
+		rv = poll(file, pts);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	long rv = -ENOTTY;
+	long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long);
+	int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	unlocked_ioctl = pde->proc_fops->unlocked_ioctl;
+	ioctl = pde->proc_fops->ioctl;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (unlocked_ioctl) {
+		rv = unlocked_ioctl(file, cmd, arg);
+		if (rv == -ENOIOCTLCMD)
+			rv = -EINVAL;
+	} else if (ioctl) {
+		lock_kernel();
+		rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+		unlock_kernel();
+	}
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+#ifdef CONFIG_COMPAT
+static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	long rv = -ENOTTY;
+	long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	compat_ioctl = pde->proc_fops->compat_ioctl;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (compat_ioctl)
+		rv = compat_ioctl(file, cmd, arg);
+
+	pde_users_dec(pde);
+	return rv;
+}
+#endif
+
+static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	int rv = -EIO;
+	int (*mmap)(struct file *, struct vm_area_struct *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	mmap = pde->proc_fops->mmap;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (mmap)
+		rv = mmap(file, vma);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static int proc_reg_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *pde = PDE(inode);
+	int rv = 0;
+	int (*open)(struct inode *, struct file *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	open = pde->proc_fops->open;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (open)
+		rv = open(inode, file);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static int proc_reg_release(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *pde = PDE(inode);
+	int rv = 0;
+	int (*release)(struct inode *, struct file *);
+
+	spin_lock(&pde->pde_unload_lock);
+	if (!pde->proc_fops) {
+		spin_unlock(&pde->pde_unload_lock);
+		return rv;
+	}
+	pde->pde_users++;
+	release = pde->proc_fops->release;
+	spin_unlock(&pde->pde_unload_lock);
+
+	if (release)
+		rv = release(inode, file);
+
+	pde_users_dec(pde);
+	return rv;
+}
+
+static const struct file_operations proc_reg_file_ops = {
+	.llseek		= proc_reg_llseek,
+	.read		= proc_reg_read,
+	.write		= proc_reg_write,
+	.poll		= proc_reg_poll,
+	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= proc_reg_compat_ioctl,
+#endif
+	.mmap		= proc_reg_mmap,
+	.open		= proc_reg_open,
+	.release	= proc_reg_release,
+};
+
 struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 				struct proc_dir_entry *de)
 {
@@ -166,8 +412,12 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 			inode->i_nlink = de->nlink;
 		if (de->proc_iops)
 			inode->i_op = de->proc_iops;
-		if (de->proc_fops)
-			inode->i_fop = de->proc_fops;
+		if (de->proc_fops) {
+			if (S_ISREG(inode->i_mode))
+				inode->i_fop = &proc_reg_file_ops;
+			else
+				inode->i_fop = de->proc_fops;
+		}
 	}
 
 	return inode;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5fd49e47f83..d24b8d46059 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -105,6 +105,7 @@ static int uptime_read_proc(char *page, char **start, off_t off,
 	cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
 
 	do_posix_clock_monotonic_gettime(&uptime);
+	monotonic_to_bootbased(&uptime);
 	cputime_to_timespec(idletime, &idle);
 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
@@ -443,12 +444,12 @@ static int show_stat(struct seq_file *p, void *v)
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	u64 sum = 0;
+	struct timespec boottime;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	jif = - wall_to_monotonic.tv_sec;
-	if (wall_to_monotonic.tv_nsec)
-		--jif;
+	getboottime(&boottime);
+	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
 		int j;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index b3a473b0a19..22846225acf 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -69,7 +69,7 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
 
 static int show_tty_driver(struct seq_file *m, void *v)
 {
-	struct tty_driver *p = v;
+	struct tty_driver *p = list_entry(v, struct tty_driver, tty_drivers);
 	dev_t from = MKDEV(p->major, p->minor_start);
 	dev_t to = from + p->num;
 
@@ -106,22 +106,13 @@ static int show_tty_driver(struct seq_file *m, void *v)
 /* iterator */
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct list_head *p;
-	loff_t l = *pos;
-
 	mutex_lock(&tty_mutex);
-	list_for_each(p, &tty_drivers)
-		if (!l--)
-			return list_entry(p, struct tty_driver, tty_drivers);
-	return NULL;
+	return seq_list_start(&tty_drivers, *pos);
 }
 
 static void *t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
-	(*pos)++;
-	return p==&tty_drivers ? NULL :
-			list_entry(p, struct tty_driver, tty_drivers);
+	return seq_list_next(v, &tty_drivers, pos);
 }
 
 static void t_stop(struct seq_file *m, void *v)
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc..867f42b0203 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.mmap		= generic_file_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 #ifdef CONFIG_QNX4FS_RW
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
diff --git a/fs/quota.c b/fs/quota.c
index 9f237d6182c..e6577ac15a6 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -10,12 +10,14 @@
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
+#include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
+#include <linux/types.h>
 
 /* Check validity of generic quotactl commands */
 static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
@@ -384,3 +386,119 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
 
 	return ret;
 }
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_IA64)
+/*
+ * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
+ * and is necessary due to alignment problems.
+ */
+struct compat_if_dqblk {
+	compat_u64 dqb_bhardlimit;
+	compat_u64 dqb_bsoftlimit;
+	compat_u64 dqb_curspace;
+	compat_u64 dqb_ihardlimit;
+	compat_u64 dqb_isoftlimit;
+	compat_u64 dqb_curinodes;
+	compat_u64 dqb_btime;
+	compat_u64 dqb_itime;
+	compat_uint_t dqb_valid;
+};
+
+/* XFS structures */
+struct compat_fs_qfilestat {
+	compat_u64 dqb_bhardlimit;
+	compat_u64 qfs_nblks;
+	compat_uint_t qfs_nextents;
+};
+
+struct compat_fs_quota_stat {
+	__s8		qs_version;
+	__u16		qs_flags;
+	__s8		qs_pad;
+	struct compat_fs_qfilestat	qs_uquota;
+	struct compat_fs_qfilestat	qs_gquota;
+	compat_uint_t	qs_incoredqs;
+	compat_int_t	qs_btimelimit;
+	compat_int_t	qs_itimelimit;
+	compat_int_t	qs_rtbtimelimit;
+	__u16		qs_bwarnlimit;
+	__u16		qs_iwarnlimit;
+};
+
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+						qid_t id, void __user *addr)
+{
+	unsigned int cmds;
+	struct if_dqblk __user *dqblk;
+	struct compat_if_dqblk __user *compat_dqblk;
+	struct fs_quota_stat __user *fsqstat;
+	struct compat_fs_quota_stat __user *compat_fsqstat;
+	compat_uint_t data;
+	u16 xdata;
+	long ret;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+	case Q_GETQUOTA:
+		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
+		compat_dqblk = addr;
+		ret = sys_quotactl(cmd, special, id, dqblk);
+		if (ret)
+			break;
+		if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
+			get_user(data, &dqblk->dqb_valid) ||
+			put_user(data, &compat_dqblk->dqb_valid))
+			ret = -EFAULT;
+		break;
+	case Q_SETQUOTA:
+		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
+		compat_dqblk = addr;
+		ret = -EFAULT;
+		if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
+			get_user(data, &compat_dqblk->dqb_valid) ||
+			put_user(data, &dqblk->dqb_valid))
+			break;
+		ret = sys_quotactl(cmd, special, id, dqblk);
+		break;
+	case Q_XGETQSTAT:
+		fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
+		compat_fsqstat = addr;
+		ret = sys_quotactl(cmd, special, id, fsqstat);
+		if (ret)
+			break;
+		ret = -EFAULT;
+		/* Copying qs_version, qs_flags, qs_pad */
+		if (copy_in_user(compat_fsqstat, fsqstat,
+			offsetof(struct compat_fs_quota_stat, qs_uquota)))
+			break;
+		/* Copying qs_uquota */
+		if (copy_in_user(&compat_fsqstat->qs_uquota,
+			&fsqstat->qs_uquota,
+			sizeof(compat_fsqstat->qs_uquota)) ||
+			get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
+			put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
+			break;
+		/* Copying qs_gquota */
+		if (copy_in_user(&compat_fsqstat->qs_gquota,
+			&fsqstat->qs_gquota,
+			sizeof(compat_fsqstat->qs_gquota)) ||
+			get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
+			put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
+			break;
+		/* Copying the rest */
+		if (copy_in_user(&compat_fsqstat->qs_incoredqs,
+			&fsqstat->qs_incoredqs,
+			sizeof(struct compat_fs_quota_stat) -
+			offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
+			get_user(xdata, &fsqstat->qs_iwarnlimit) ||
+			put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
+			break;
+		ret = 0;
+		break;
+	default:
+		ret = sys_quotactl(cmd, special, id, addr);
+	}
+	return ret;
+}
+#endif
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124..97bdc0b2f9d 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.fsync		= simple_sync_file,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 	.llseek		= generic_file_llseek,
 };
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5d258c40a2f..cad2b7ace63 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
 	.write			= do_sync_write,
 	.aio_write		= generic_file_aio_write,
 	.fsync			= simple_sync_file,
-	.sendfile		= generic_file_sendfile,
+	.splice_read		= generic_file_splice_read,
 	.llseek			= generic_file_llseek,
 };
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015..507ddff48a9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
+#include <linux/splice.h>
 #include "read_write.h"
 
 #include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.mmap		= generic_file_readonly_mmap,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	struct inode * in_inode, * out_inode;
 	loff_t pos;
 	ssize_t retval;
-	int fput_needed_in, fput_needed_out;
+	int fput_needed_in, fput_needed_out, fl;
 
 	/*
 	 * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	in_inode = in_file->f_path.dentry->d_inode;
 	if (!in_inode)
 		goto fput_in;
-	if (!in_file->f_op || !in_file->f_op->sendfile)
+	if (!in_file->f_op || !in_file->f_op->splice_read)
 		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		count = max - pos;
 	}
 
-	retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+	fl = 0;
+#if 0
+	/*
+	 * We need to debate whether we can enable this or not. The
+	 * man page documents EAGAIN return for the output at least,
+	 * and the application is arguably buggy if it doesn't expect
+	 * EAGAIN on a non-blocking file descriptor.
+	 */
+	if (in_file->f_flags & O_NONBLOCK)
+		fl = SPLICE_F_NONBLOCK;
+#endif
+	retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 
 	if (retval > 0) {
 		add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580..2070aeee2a5 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1305,7 +1305,6 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 	if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
 	    *ppos + count > MAX_NON_LFS) {
 		if (*ppos >= MAX_NON_LFS) {
-			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
 		if (count > MAX_NON_LFS - (unsigned long)*ppos)
@@ -1531,7 +1530,6 @@ const struct file_operations reiserfs_file_operations = {
 	.open = generic_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
-	.sendfile = generic_file_sendfile,
 	.aio_read = generic_file_aio_read,
 	.aio_write = generic_file_aio_write,
 	.splice_read = generic_file_splice_read,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0ac22af7afe..bbb19be260c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read);
 
 static int traverse(struct seq_file *m, loff_t offset)
 {
-	loff_t pos = 0;
+	loff_t pos = 0, index;
 	int error = 0;
 	void *p;
 
 	m->version = 0;
-	m->index = 0;
+	index = 0;
 	m->count = m->from = 0;
-	if (!offset)
+	if (!offset) {
+		m->index = index;
 		return 0;
+	}
 	if (!m->buf) {
 		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
 		if (!m->buf)
 			return -ENOMEM;
 	}
-	p = m->op->start(m, &m->index);
+	p = m->op->start(m, &index);
 	while (p) {
 		error = PTR_ERR(p);
 		if (IS_ERR(p))
@@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, loff_t offset)
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
 			m->count -= m->from;
+			m->index = index;
 			break;
 		}
 		pos += m->count;
 		m->count = 0;
 		if (pos == offset) {
-			m->index++;
+			index++;
+			m->index = index;
 			break;
 		}
-		p = m->op->next(m, p, &m->index);
+		p = m->op->next(m, p, &index);
 	}
 	m->op->stop(m, p);
 	return error;
@@ -260,8 +264,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
 				}
 			}
 	}
-	mutex_unlock(&m->lock);
 	file->f_version = m->version;
+	mutex_unlock(&m->lock);
 	return retval;
 }
 EXPORT_SYMBOL(seq_lseek);
@@ -447,3 +451,37 @@ int seq_puts(struct seq_file *m, const char *s)
 	return -1;
 }
 EXPORT_SYMBOL(seq_puts);
+
+struct list_head *seq_list_start(struct list_head *head, loff_t pos)
+{
+	struct list_head *lh;
+
+	list_for_each(lh, head)
+		if (pos-- == 0)
+			return lh;
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(seq_list_start);
+
+struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
+{
+	if (!pos)
+		return head;
+
+	return seq_list_start(head, pos - 1);
+}
+
+EXPORT_SYMBOL(seq_list_start_head);
+
+struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
+{
+	struct list_head *lh;
+
+	lh = ((struct list_head *)v)->next;
+	++*ppos;
+	return lh == head ? NULL : lh;
+}
+
+EXPORT_SYMBOL(seq_list_next);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index f1da89203a9..3b07f26d984 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -133,7 +133,8 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
 	 * the peer disconnects.
 	 */
 	if (signalfd_lock(ctx, &lk)) {
-		if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 ||
+		if ((lk.tsk == current &&
+		     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
 		    next_signal(&lk.tsk->signal->shared_pending,
 				&ctx->sigmask) > 0)
 			events |= POLLIN;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c..c5d78a7e492 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
 }
 
 static ssize_t
-smb_file_sendfile(struct file *file, loff_t *ppos,
-		  size_t count, read_actor_t actor, void *target)
+smb_file_splice_read(struct file *file, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t count,
+		     unsigned int flags)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
 			 DENTRY_PATH(dentry), status);
 		goto out;
 	}
-	status = generic_file_sendfile(file, ppos, count, actor, target);
+	status = generic_file_splice_read(file, ppos, pipe, count, flags);
 out:
 	return status;
 }
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
 	.open		= smb_file_open,
 	.release	= smb_file_release,
 	.fsync		= smb_fsync,
-	.sendfile	= smb_file_sendfile,
+	.splice_read	= smb_file_splice_read,
 };
 
 const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index 12f28281d2b..6c9828651e6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -28,22 +28,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
-
-struct partial_page {
-	unsigned int offset;
-	unsigned int len;
-};
-
-/*
- * Passed to splice_to_pipe
- */
-struct splice_pipe_desc {
-	struct page **pages;		/* page map */
-	struct partial_page *partial;	/* pages[] may not be contig */
-	int nr_pages;			/* number of pages in map */
-	unsigned int flags;		/* splice flags */
-	const struct pipe_buf_operations *ops;/* ops associated with output pipe */
-};
+#include <linux/security.h>
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -101,8 +86,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
-static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
-				   struct pipe_buffer *buf)
+/*
+ * Check whether the contents of buf is OK to access. Since the content
+ * is a page cache page, IO may be in flight.
+ */
+static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
+				       struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 	int err;
@@ -143,7 +132,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
-	.pin = page_cache_pipe_buf_pin,
+	.confirm = page_cache_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = page_cache_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -163,19 +152,27 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
-	.pin = generic_pipe_buf_pin,
+	.confirm = generic_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = user_page_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
 };
 
-/*
- * Pipe output worker. This sets up our pipe format with the page cache
- * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
+/**
+ * splice_to_pipe - fill passed data into a pipe
+ * @pipe:	pipe to fill
+ * @spd:	data to fill
+ *
+ * Description:
+ *    @spd contains a map of pages and len/offset tupples, a long with
+ *    the struct pipe_buf_operations associated with these pages. This
+ *    function will link that data to the pipe.
+ *
  */
-static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
-			      struct splice_pipe_desc *spd)
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+		       struct splice_pipe_desc *spd)
 {
+	unsigned int spd_pages = spd->nr_pages;
 	int ret, do_wakeup, page_nr;
 
 	ret = 0;
@@ -200,6 +197,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 			buf->page = spd->pages[page_nr];
 			buf->offset = spd->partial[page_nr].offset;
 			buf->len = spd->partial[page_nr].len;
+			buf->private = spd->partial[page_nr].private;
 			buf->ops = spd->ops;
 			if (spd->flags & SPLICE_F_GIFT)
 				buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -244,17 +242,18 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 		pipe->waiting_writers--;
 	}
 
-	if (pipe->inode)
+	if (pipe->inode) {
 		mutex_unlock(&pipe->inode->i_mutex);
 
-	if (do_wakeup) {
-		smp_mb();
-		if (waitqueue_active(&pipe->wait))
-			wake_up_interruptible(&pipe->wait);
-		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+		if (do_wakeup) {
+			smp_mb();
+			if (waitqueue_active(&pipe->wait))
+				wake_up_interruptible(&pipe->wait);
+			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+		}
 	}
 
-	while (page_nr < spd->nr_pages)
+	while (page_nr < spd_pages)
 		page_cache_release(spd->pages[page_nr++]);
 
 	return ret;
@@ -272,7 +271,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	struct page *page;
 	pgoff_t index, end_index;
 	loff_t isize;
-	size_t total_len;
 	int error, page_nr;
 	struct splice_pipe_desc spd = {
 		.pages = pages,
@@ -295,20 +293,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
 
 	/*
-	 * Now fill in the holes:
-	 */
-	error = 0;
-	total_len = 0;
-
-	/*
 	 * Lookup the (hopefully) full range of pages we need.
 	 */
 	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
 
 	/*
 	 * If find_get_pages_contig() returned fewer pages than we needed,
-	 * allocate the rest.
+	 * allocate the rest and fill in the holes.
 	 */
+	error = 0;
 	index += spd.nr_pages;
 	while (spd.nr_pages < nr_pages) {
 		/*
@@ -415,43 +408,47 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 
 				break;
 			}
+		}
+fill_it:
+		/*
+		 * i_size must be checked after PageUptodate.
+		 */
+		isize = i_size_read(mapping->host);
+		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		if (unlikely(!isize || index > end_index))
+			break;
+
+		/*
+		 * if this is the last page, see if we need to shrink
+		 * the length and stop
+		 */
+		if (end_index == index) {
+			unsigned int plen;
 
 			/*
-			 * i_size must be checked after ->readpage().
+			 * max good bytes in this page
 			 */
-			isize = i_size_read(mapping->host);
-			end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
-			if (unlikely(!isize || index > end_index))
+			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			if (plen <= loff)
 				break;
 
 			/*
-			 * if this is the last page, see if we need to shrink
-			 * the length and stop
+			 * force quit after adding this page
 			 */
-			if (end_index == index) {
-				loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
-				if (total_len + loff > isize)
-					break;
-				/*
-				 * force quit after adding this page
-				 */
-				len = this_len;
-				this_len = min(this_len, loff);
-				loff = 0;
-			}
+			this_len = min(this_len, plen - loff);
+			len = this_len;
 		}
-fill_it:
+
 		partial[page_nr].offset = loff;
 		partial[page_nr].len = this_len;
 		len -= this_len;
-		total_len += this_len;
 		loff = 0;
 		spd.nr_pages++;
 		index++;
 	}
 
 	/*
-	 * Release any pages at the end, if we quit early. 'i' is how far
+	 * Release any pages at the end, if we quit early. 'page_nr' is how far
 	 * we got, 'nr_pages' is how many pages are in the map.
 	 */
 	while (page_nr < nr_pages)
@@ -466,11 +463,16 @@ fill_it:
 /**
  * generic_file_splice_read - splice data from file to a pipe
  * @in:		file to splice from
+ * @ppos:	position in @in
  * @pipe:	pipe to splice to
  * @len:	number of bytes to splice
  * @flags:	splice modifier flags
  *
- * Will read pages from given file and fill them into a pipe.
+ * Description:
+ *    Will read pages from given file and fill them into a pipe. Can be
+ *    used as long as the address_space operations for the source implements
+ *    a readpage() hook.
+ *
  */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 				 struct pipe_inode_info *pipe, size_t len,
@@ -478,11 +480,19 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 {
 	ssize_t spliced;
 	int ret;
+	loff_t isize, left;
+
+	isize = i_size_read(in->f_mapping->host);
+	if (unlikely(*ppos >= isize))
+		return 0;
+
+	left = isize - *ppos;
+	if (unlikely(left < len))
+		len = left;
 
 	ret = 0;
 	spliced = 0;
-
-	while (len) {
+	while (len && !spliced) {
 		ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
 
 		if (ret < 0)
@@ -516,11 +526,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
 static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 			    struct pipe_buffer *buf, struct splice_desc *sd)
 {
-	struct file *file = sd->file;
+	struct file *file = sd->u.file;
 	loff_t pos = sd->pos;
 	int ret, more;
 
-	ret = buf->ops->pin(pipe, buf);
+	ret = buf->ops->confirm(pipe, buf);
 	if (!ret) {
 		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
 
@@ -554,7 +564,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			struct splice_desc *sd)
 {
-	struct file *file = sd->file;
+	struct file *file = sd->u.file;
 	struct address_space *mapping = file->f_mapping;
 	unsigned int offset, this_len;
 	struct page *page;
@@ -564,7 +574,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	/*
 	 * make sure the data in this buffer is uptodate
 	 */
-	ret = buf->ops->pin(pipe, buf);
+	ret = buf->ops->confirm(pipe, buf);
 	if (unlikely(ret))
 		return ret;
 
@@ -644,7 +654,6 @@ find_page:
 	 * accessed, we are now done!
 	 */
 	mark_page_accessed(page);
-	balance_dirty_pages_ratelimited(mapping);
 out:
 	page_cache_release(page);
 	unlock_page(page);
@@ -652,36 +661,37 @@ out_ret:
 	return ret;
 }
 
-/*
- * Pipe input worker. Most of this logic works like a regular pipe, the
- * key here is the 'actor' worker passed in that actually moves the data
- * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe:	pipe to splice from
+ * @sd:		information to @actor
+ * @actor:	handler that splices the data
+ *
+ * Description:
+ *    This function does little more than loop over the pipe and call
+ *    @actor to do the actual moving of a single struct pipe_buffer to
+ *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    pipe_to_user.
+ *
  */
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
-			   struct file *out, loff_t *ppos, size_t len,
-			   unsigned int flags, splice_actor *actor)
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+			   splice_actor *actor)
 {
 	int ret, do_wakeup, err;
-	struct splice_desc sd;
 
 	ret = 0;
 	do_wakeup = 0;
 
-	sd.total_len = len;
-	sd.flags = flags;
-	sd.file = out;
-	sd.pos = *ppos;
-
 	for (;;) {
 		if (pipe->nrbufs) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
 			const struct pipe_buf_operations *ops = buf->ops;
 
-			sd.len = buf->len;
-			if (sd.len > sd.total_len)
-				sd.len = sd.total_len;
+			sd->len = buf->len;
+			if (sd->len > sd->total_len)
+				sd->len = sd->total_len;
 
-			err = actor(pipe, buf, &sd);
+			err = actor(pipe, buf, sd);
 			if (err <= 0) {
 				if (!ret && err != -ENODATA)
 					ret = err;
@@ -693,10 +703,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 			buf->offset += err;
 			buf->len -= err;
 
-			sd.len -= err;
-			sd.pos += err;
-			sd.total_len -= err;
-			if (sd.len)
+			sd->len -= err;
+			sd->pos += err;
+			sd->total_len -= err;
+			if (sd->len)
 				continue;
 
 			if (!buf->len) {
@@ -708,7 +718,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 					do_wakeup = 1;
 			}
 
-			if (!sd.total_len)
+			if (!sd->total_len)
 				break;
 		}
 
@@ -721,7 +731,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 				break;
 		}
 
-		if (flags & SPLICE_F_NONBLOCK) {
+		if (sd->flags & SPLICE_F_NONBLOCK) {
 			if (!ret)
 				ret = -EAGAIN;
 			break;
@@ -755,12 +765,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(__splice_from_pipe);
 
+/**
+ * splice_from_pipe - splice data from a pipe to a file
+ * @pipe:	pipe to splice from
+ * @out:	file to splice to
+ * @ppos:	position in @out
+ * @len:	how many bytes to splice
+ * @flags:	splice modifier flags
+ * @actor:	handler that splices the data
+ *
+ * Description:
+ *    See __splice_from_pipe. This function locks the input and output inodes,
+ *    otherwise it's identical to __splice_from_pipe().
+ *
+ */
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 			 loff_t *ppos, size_t len, unsigned int flags,
 			 splice_actor *actor)
 {
 	ssize_t ret;
 	struct inode *inode = out->f_mapping->host;
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
 
 	/*
 	 * The actor worker might be calling ->prepare_write and
@@ -769,7 +799,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	 * pipe->inode, we have to order lock acquiry here.
 	 */
 	inode_double_lock(inode, pipe->inode);
-	ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+	ret = __splice_from_pipe(pipe, &sd, actor);
 	inode_double_unlock(inode, pipe->inode);
 
 	return ret;
@@ -779,12 +809,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
  * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
  * @pipe:	pipe info
  * @out:	file to write to
+ * @ppos:	position in @out
  * @len:	number of bytes to splice
  * @flags:	splice modifier flags
  *
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file. The caller is responsible
- * for acquiring i_mutex on both inodes.
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file. The caller is responsible
+ *    for acquiring i_mutex on both inodes.
  *
  */
 ssize_t
@@ -793,6 +825,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 {
 	struct address_space *mapping = out->f_mapping;
 	struct inode *inode = mapping->host;
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
 	ssize_t ret;
 	int err;
 
@@ -800,9 +838,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(err))
 		return err;
 
-	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
 	if (ret > 0) {
+		unsigned long nr_pages;
+
 		*ppos += ret;
+		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 		/*
 		 * If file or inode is SYNC and we actually wrote some data,
@@ -815,6 +856,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 			if (err)
 				ret = err;
 		}
+		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
 	}
 
 	return ret;
@@ -826,11 +868,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
  * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
  * @out:	file to write to
+ * @ppos:	position in @out
  * @len:	number of bytes to splice
  * @flags:	splice modifier flags
  *
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file.
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
  *
  */
 ssize_t
@@ -853,7 +897,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
+		unsigned long nr_pages;
+
 		*ppos += ret;
+		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 		/*
 		 * If file or inode is SYNC and we actually wrote some data,
@@ -868,6 +915,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			if (err)
 				ret = err;
 		}
+		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
 	}
 
 	return ret;
@@ -877,13 +925,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
 
 /**
  * generic_splice_sendpage - splice data from a pipe to a socket
- * @inode:	pipe inode
+ * @pipe:	pipe to splice from
  * @out:	socket to write to
+ * @ppos:	position in @out
  * @len:	number of bytes to splice
  * @flags:	splice modifier flags
  *
- * Will send @len bytes from the pipe to a network socket. No data copying
- * is involved.
+ * Description:
+ *    Will send @len bytes from the pipe to a network socket. No data copying
+ *    is involved.
  *
  */
 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -912,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(ret < 0))
 		return ret;
 
+	ret = security_file_permission(out, MAY_WRITE);
+	if (unlikely(ret < 0))
+		return ret;
+
 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
 
@@ -922,7 +976,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
-	loff_t isize, left;
 	int ret;
 
 	if (unlikely(!in->f_op || !in->f_op->splice_read))
@@ -935,25 +988,34 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
-	isize = i_size_read(in->f_mapping->host);
-	if (unlikely(*ppos >= isize))
-		return 0;
-	
-	left = isize - *ppos;
-	if (unlikely(left < len))
-		len = left;
+	ret = security_file_permission(in, MAY_READ);
+	if (unlikely(ret < 0))
+		return ret;
 
 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
 
-long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		      size_t len, unsigned int flags)
+/**
+ * splice_direct_to_actor - splices data directly between two non-pipes
+ * @in:		file to splice from
+ * @sd:		actor information on where to splice to
+ * @actor:	handles the data splicing
+ *
+ * Description:
+ *    This is a special case helper to splice directly between two
+ *    points, without requiring an explicit pipe. Internally an allocated
+ *    pipe is cached in the process, and reused during the life time of
+ *    that process.
+ *
+ */
+ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+			       splice_direct_actor *actor)
 {
 	struct pipe_inode_info *pipe;
 	long ret, bytes;
-	loff_t out_off;
 	umode_t i_mode;
-	int i;
+	size_t len;
+	int i, flags;
 
 	/*
 	 * We require the input being a regular file, as we don't want to
@@ -989,49 +1051,41 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 	 */
 	ret = 0;
 	bytes = 0;
-	out_off = 0;
+	len = sd->total_len;
+	flags = sd->flags;
 
-	while (len) {
-		size_t read_len, max_read_len;
+	/*
+	 * Don't block on output, we have to drain the direct pipe.
+	 */
+	sd->flags &= ~SPLICE_F_NONBLOCK;
 
-		/*
-		 * Do at most PIPE_BUFFERS pages worth of transfer:
-		 */
-		max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
+	while (len) {
+		size_t read_len;
 
-		ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
-		if (unlikely(ret < 0))
+		ret = do_splice_to(in, &sd->pos, pipe, len, flags);
+		if (unlikely(ret <= 0))
 			goto out_release;
 
 		read_len = ret;
+		sd->total_len = read_len;
 
 		/*
 		 * NOTE: nonblocking mode only applies to the input. We
 		 * must not do the output in nonblocking mode as then we
 		 * could get stuck data in the internal pipe:
 		 */
-		ret = do_splice_from(pipe, out, &out_off, read_len,
-				     flags & ~SPLICE_F_NONBLOCK);
-		if (unlikely(ret < 0))
+		ret = actor(pipe, sd);
+		if (unlikely(ret <= 0))
 			goto out_release;
 
 		bytes += ret;
 		len -= ret;
 
-		/*
-		 * In nonblocking mode, if we got back a short read then
-		 * that was due to either an IO error or due to the
-		 * pagecache entry not being there. In the IO error case
-		 * the _next_ splice attempt will produce a clean IO error
-		 * return value (not a short read), so in both cases it's
-		 * correct to break out of the loop here:
-		 */
-		if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
-			break;
+		if (ret < read_len)
+			goto out_release;
 	}
 
 	pipe->nrbufs = pipe->curbuf = 0;
-
 	return bytes;
 
 out_release:
@@ -1056,9 +1110,51 @@ out_release:
 		return bytes;
 
 	return ret;
+
 }
+EXPORT_SYMBOL(splice_direct_to_actor);
 
-EXPORT_SYMBOL(do_splice_direct);
+static int direct_splice_actor(struct pipe_inode_info *pipe,
+			       struct splice_desc *sd)
+{
+	struct file *file = sd->u.file;
+
+	return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+}
+
+/**
+ * do_splice_direct - splices data directly between two files
+ * @in:		file to splice from
+ * @ppos:	input file offset
+ * @out:	file to splice to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    For use by do_sendfile(). splice can easily emulate sendfile, but
+ *    doing it in the application would incur an extra system call
+ *    (splice in + splice out, as compared to just sendfile()). So this helper
+ *    can splice directly through a process-private pipe.
+ *
+ */
+long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+		      size_t len, unsigned int flags)
+{
+	struct splice_desc sd = {
+		.len		= len,
+		.total_len	= len,
+		.flags		= flags,
+		.pos		= *ppos,
+		.u.file		= out,
+	};
+	long ret;
+
+	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+	if (ret > 0)
+		*ppos += ret;
+
+	return ret;
+}
 
 /*
  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
@@ -1240,28 +1336,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 	return error;
 }
 
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	char *src;
+	int ret;
+
+	ret = buf->ops->confirm(pipe, buf);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * See if we can use the atomic maps, by prefaulting in the
+	 * pages and doing an atomic copy
+	 */
+	if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+		src = buf->ops->map(pipe, buf, 1);
+		ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+							sd->len);
+		buf->ops->unmap(pipe, buf, src);
+		if (!ret) {
+			ret = sd->len;
+			goto out;
+		}
+	}
+
+	/*
+	 * No dice, use slow non-atomic map and copy
+ 	 */
+	src = buf->ops->map(pipe, buf, 0);
+
+	ret = sd->len;
+	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+		ret = -EFAULT;
+
+out:
+	if (ret > 0)
+		sd->u.userptr += ret;
+	buf->ops->unmap(pipe, buf, src);
+	return ret;
+}
+
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+			     unsigned long nr_segs, unsigned int flags)
+{
+	struct pipe_inode_info *pipe;
+	struct splice_desc sd;
+	ssize_t size;
+	int error;
+	long ret;
+
+	pipe = pipe_info(file->f_path.dentry->d_inode);
+	if (!pipe)
+		return -EBADF;
+
+	if (pipe->inode)
+		mutex_lock(&pipe->inode->i_mutex);
+
+	error = ret = 0;
+	while (nr_segs) {
+		void __user *base;
+		size_t len;
+
+		/*
+		 * Get user address base and length for this iovec.
+		 */
+		error = get_user(base, &iov->iov_base);
+		if (unlikely(error))
+			break;
+		error = get_user(len, &iov->iov_len);
+		if (unlikely(error))
+			break;
+
+		/*
+		 * Sanity check this iovec. 0 read succeeds.
+		 */
+		if (unlikely(!len))
+			break;
+		if (unlikely(!base)) {
+			error = -EFAULT;
+			break;
+		}
+
+		sd.len = 0;
+		sd.total_len = len;
+		sd.flags = flags;
+		sd.u.userptr = base;
+		sd.pos = 0;
+
+		size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+		if (size < 0) {
+			if (!ret)
+				ret = size;
+
+			break;
+		}
+
+		ret += size;
+
+		if (size < len)
+			break;
+
+		nr_segs--;
+		iov++;
+	}
+
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
+
+	if (!ret)
+		ret = error;
+
+	return ret;
+}
+
 /*
  * vmsplice splices a user address range into a pipe. It can be thought of
  * as splice-from-memory, where the regular splice is splice-from-file (or
  * to file). In both cases the output is a pipe, naturally.
- *
- * Note that vmsplice only supports splicing _from_ user memory to a pipe,
- * not the other way around. Splicing from user memory is a simple operation
- * that can be supported without any funky alignment restrictions or nasty
- * vm tricks. We simply map in the user memory and fill them into a pipe.
- * The reverse isn't quite as easy, though. There are two possible solutions
- * for that:
- *
- *	- memcpy() the data internally, at which point we might as well just
- *	  do a regular read() on the buffer anyway.
- *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
- *	  has restriction limitations on both ends of the pipe).
- *
- * Alas, it isn't here.
- *
  */
-static long do_vmsplice(struct file *file, const struct iovec __user *iov,
-			unsigned long nr_segs, unsigned int flags)
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+			     unsigned long nr_segs, unsigned int flags)
 {
 	struct pipe_inode_info *pipe;
 	struct page *pages[PIPE_BUFFERS];
@@ -1276,10 +1475,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 	pipe = pipe_info(file->f_path.dentry->d_inode);
 	if (!pipe)
 		return -EBADF;
-	if (unlikely(nr_segs > UIO_MAXIOV))
-		return -EINVAL;
-	else if (unlikely(!nr_segs))
-		return 0;
 
 	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
 					    flags & SPLICE_F_GIFT);
@@ -1289,6 +1484,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 	return splice_to_pipe(pipe, &spd);
 }
 
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ *	- memcpy() the data internally, at which point we might as well just
+ *	  do a regular read() on the buffer anyway.
+ *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
+ *	  has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
 			     unsigned long nr_segs, unsigned int flags)
 {
@@ -1296,11 +1507,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
 	long error;
 	int fput;
 
+	if (unlikely(nr_segs > UIO_MAXIOV))
+		return -EINVAL;
+	else if (unlikely(!nr_segs))
+		return 0;
+
 	error = -EBADF;
 	file = fget_light(fd, &fput);
 	if (file) {
 		if (file->f_mode & FMODE_WRITE)
-			error = do_vmsplice(file, iov, nr_segs, flags);
+			error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+		else if (file->f_mode & FMODE_READ)
+			error = vmsplice_to_user(file, iov, nr_segs, flags);
 
 		fput_light(file, fput);
 	}
diff --git a/fs/super.c b/fs/super.c
index 5260d620c55..fc8ebedc6be 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -884,6 +884,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	error = type->get_sb(type, flags, name, data, mnt);
 	if (error < 0)
 		goto out_free_secdata;
+	BUG_ON(!mnt->mnt_sb);
 
  	error = security_sb_kern_mount(mnt->mnt_sb, secdata);
  	if (error)
diff --git a/fs/sync.c b/fs/sync.c
index 2f97576355b..7cd005ea763 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -236,6 +236,14 @@ out:
 	return ret;
 }
 
+/* It would be nice if people remember that not all the world's an i386
+   when they introduce new system calls */
+asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
+				     loff_t offset, loff_t nbytes)
+{
+	return sys_sync_file_range(fd, offset, nbytes, flags);
+}
+
 /*
  * `endbyte' is inclusive
  */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db..135353f8a29 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
 
 #include "sysfs.h"
 
+struct bin_buffer {
+	struct mutex	mutex;
+	void		*buffer;
+	int		mmapped;
+};
+
 static int
 fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 {
-	struct bin_attribute * attr = to_bin_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
-	if (!attr->read)
-		return -EIO;
+	rc = -EIO;
+	if (attr->read)
+		rc = attr->read(kobj, attr, buffer, off, count);
 
-	return attr->read(kobj, buffer, off, count);
+	sysfs_put_active_two(attr_sd);
+
+	return rc;
 }
 
 static ssize_t
-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
+read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
-	char *buffer = file->private_data;
+	struct bin_buffer *bb = file->private_data;
 	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
-	int ret;
-
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
+	int count = min_t(size_t, bytes, PAGE_SIZE);
 
 	if (size) {
 		if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
 			count = size - offs;
 	}
 
-	ret = fill_read(dentry, buffer, offs, count);
-	if (ret < 0) 
-		return ret;
-	count = ret;
+	mutex_lock(&bb->mutex);
+
+	count = fill_read(dentry, bb->buffer, offs, count);
+	if (count < 0)
+		goto out_unlock;
 
-	if (copy_to_user(userbuf, buffer, count))
-		return -EFAULT;
+	if (copy_to_user(userbuf, bb->buffer, count)) {
+		count = -EFAULT;
+		goto out_unlock;
+	}
 
-	pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
+	pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
 
 	*off = offs + count;
 
+ out_unlock:
+	mutex_unlock(&bb->mutex);
 	return count;
 }
 
 static int
 flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 {
-	struct bin_attribute *attr = to_bin_attr(dentry);
-	struct kobject *kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	rc = -EIO;
+	if (attr->write)
+		rc = attr->write(kobj, attr, buffer, offset, count);
 
-	if (!attr->write)
-		return -EIO;
+	sysfs_put_active_two(attr_sd);
 
-	return attr->write(kobj, buffer, offset, count);
+	return rc;
 }
 
-static ssize_t write(struct file * file, const char __user * userbuf,
-		     size_t count, loff_t * off)
+static ssize_t write(struct file *file, const char __user *userbuf,
+		     size_t bytes, loff_t *off)
 {
-	char *buffer = file->private_data;
+	struct bin_buffer *bb = file->private_data;
 	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
+	int count = min_t(size_t, bytes, PAGE_SIZE);
 
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
 	if (size) {
 		if (offs > size)
 			return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
 			count = size - offs;
 	}
 
-	if (copy_from_user(buffer, userbuf, count))
-		return -EFAULT;
+	mutex_lock(&bb->mutex);
 
-	count = flush_write(dentry, buffer, offs, count);
+	if (copy_from_user(bb->buffer, userbuf, count)) {
+		count = -EFAULT;
+		goto out_unlock;
+	}
+
+	count = flush_write(dentry, bb->buffer, offs, count);
 	if (count > 0)
 		*off = offs + count;
+
+ out_unlock:
+	mutex_unlock(&bb->mutex);
 	return count;
 }
 
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct bin_attribute *attr = to_bin_attr(dentry);
-	struct kobject *kobj = to_kobj(dentry->d_parent);
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	mutex_lock(&bb->mutex);
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
-	if (!attr->mmap)
-		return -EINVAL;
+	rc = -EINVAL;
+	if (attr->mmap)
+		rc = attr->mmap(kobj, attr, vma);
 
-	return attr->mmap(kobj, attr, vma);
+	if (rc == 0 && !bb->mmapped)
+		bb->mmapped = 1;
+	else
+		sysfs_put_active_two(attr_sd);
+
+	mutex_unlock(&bb->mutex);
+
+	return rc;
 }
 
 static int open(struct inode * inode, struct file * file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
-	int error = -EINVAL;
-
-	if (!kobj || !attr)
-		goto Done;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct bin_buffer *bb = NULL;
+	int error;
 
-	/* Grab the module reference for this attribute if we have one */
-	error = -ENODEV;
-	if (!try_module_get(attr->attr.owner)) 
-		goto Done;
+	/* need attr_sd for attr */
+	if (!sysfs_get_active(attr_sd))
+		return -ENODEV;
 
 	error = -EACCES;
 	if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
-		goto Error;
+		goto err_out;
 	if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
-		goto Error;
+		goto err_out;
 
 	error = -ENOMEM;
-	file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!file->private_data)
-		goto Error;
-
-	error = 0;
-    goto Done;
-
- Error:
-	module_put(attr->attr.owner);
- Done:
-	if (error)
-		kobject_put(kobj);
+	bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+	if (!bb)
+		goto err_out;
+
+	bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bb->buffer)
+		goto err_out;
+
+	mutex_init(&bb->mutex);
+	file->private_data = bb;
+
+	/* open succeeded, put active reference and pin attr_sd */
+	sysfs_put_active(attr_sd);
+	sysfs_get(attr_sd);
+	return 0;
+
+ err_out:
+	sysfs_put_active(attr_sd);
+	kfree(bb);
 	return error;
 }
 
 static int release(struct inode * inode, struct file * file)
 {
-	struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
-	u8 * buffer = file->private_data;
-
-	kobject_put(kobj);
-	module_put(attr->attr.owner);
-	kfree(buffer);
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_buffer *bb = file->private_data;
+
+	if (bb->mmapped)
+		sysfs_put_active_two(attr_sd);
+	sysfs_put(attr_sd);
+	kfree(bb->buffer);
+	kfree(bb);
 	return 0;
 }
 
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	BUG_ON(!kobj || !kobj->dentry || !attr);
+	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+	return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
 }
 
 
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 
 void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
+	if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
 		printk(KERN_ERR "%s: "
 			"bad dentry or inode or no such file: \"%s\"\n",
 			__FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 85a668680f8..aee966c44aa 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,18 +9,346 @@
 #include <linux/module.h>
 #include <linux/kobject.h>
 #include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/completion.h>
 #include <asm/semaphore.h>
 #include "sysfs.h"
 
-DECLARE_RWSEM(sysfs_rename_sem);
+DEFINE_MUTEX(sysfs_mutex);
+spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
+
+static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_IDA(sysfs_ino_ida);
+
+/**
+ *	sysfs_link_sibling - link sysfs_dirent into sibling list
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Link @sd into its sibling list which starts from
+ *	sd->s_parent->s_children.
+ *
+ *	Locking:
+ *	mutex_lock(sysfs_mutex)
+ */
+void sysfs_link_sibling(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *parent_sd = sd->s_parent;
+
+	BUG_ON(sd->s_sibling);
+	sd->s_sibling = parent_sd->s_children;
+	parent_sd->s_children = sd;
+}
+
+/**
+ *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Unlink @sd from its sibling list which starts from
+ *	sd->s_parent->s_children.
+ *
+ *	Locking:
+ *	mutex_lock(sysfs_mutex)
+ */
+void sysfs_unlink_sibling(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent **pos;
+
+	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
+		if (*pos == sd) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			break;
+		}
+	}
+}
+
+/**
+ *	sysfs_get_dentry - get dentry for the given sysfs_dirent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Get dentry for @sd.  Dentry is looked up if currently not
+ *	present.  This function climbs sysfs_dirent tree till it
+ *	reaches a sysfs_dirent with valid dentry attached and descends
+ *	down from there looking up dentry for each step.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	Pointer to found dentry on success, ERR_PTR() value on error.
+ */
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *cur;
+	struct dentry *parent_dentry, *dentry;
+	int i, depth;
+
+	/* Find the first parent which has valid s_dentry and get the
+	 * dentry.
+	 */
+	mutex_lock(&sysfs_mutex);
+ restart0:
+	spin_lock(&sysfs_assoc_lock);
+ restart1:
+	spin_lock(&dcache_lock);
+
+	dentry = NULL;
+	depth = 0;
+	cur = sd;
+	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
+		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
+			dentry = ERR_PTR(-ENOENT);
+			depth = 0;
+			break;
+		}
+		cur = cur->s_parent;
+		depth++;
+	}
+	if (!IS_ERR(dentry))
+		dentry = dget_locked(cur->s_dentry);
+
+	spin_unlock(&dcache_lock);
+	spin_unlock(&sysfs_assoc_lock);
+
+	/* from the found dentry, look up depth times */
+	while (depth--) {
+		/* find and get depth'th ancestor */
+		for (cur = sd, i = 0; cur && i < depth; i++)
+			cur = cur->s_parent;
+
+		/* This can happen if tree structure was modified due
+		 * to move/rename.  Restart.
+		 */
+		if (i != depth) {
+			dput(dentry);
+			goto restart0;
+		}
+
+		sysfs_get(cur);
+
+		mutex_unlock(&sysfs_mutex);
+
+		/* look it up */
+		parent_dentry = dentry;
+		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
+					     strlen(cur->s_name));
+		dput(parent_dentry);
+
+		if (IS_ERR(dentry)) {
+			sysfs_put(cur);
+			return dentry;
+		}
+
+		mutex_lock(&sysfs_mutex);
+		spin_lock(&sysfs_assoc_lock);
+
+		/* This, again, can happen if tree structure has
+		 * changed and we looked up the wrong thing.  Restart.
+		 */
+		if (cur->s_dentry != dentry) {
+			dput(dentry);
+			sysfs_put(cur);
+			goto restart1;
+		}
+
+		spin_unlock(&sysfs_assoc_lock);
+
+		sysfs_put(cur);
+	}
+
+	mutex_unlock(&sysfs_mutex);
+	return dentry;
+}
+
+/**
+ *	sysfs_get_active - get an active reference to sysfs_dirent
+ *	@sd: sysfs_dirent to get an active reference to
+ *
+ *	Get an active reference of @sd.  This function is noop if @sd
+ *	is NULL.
+ *
+ *	RETURNS:
+ *	Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
+{
+	if (unlikely(!sd))
+		return NULL;
+
+	while (1) {
+		int v, t;
+
+		v = atomic_read(&sd->s_active);
+		if (unlikely(v < 0))
+			return NULL;
+
+		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
+		if (likely(t == v))
+			return sd;
+		if (t < 0)
+			return NULL;
+
+		cpu_relax();
+	}
+}
+
+/**
+ *	sysfs_put_active - put an active reference to sysfs_dirent
+ *	@sd: sysfs_dirent to put an active reference to
+ *
+ *	Put an active reference to @sd.  This function is noop if @sd
+ *	is NULL.
+ */
+void sysfs_put_active(struct sysfs_dirent *sd)
+{
+	struct completion *cmpl;
+	int v;
+
+	if (unlikely(!sd))
+		return;
+
+	v = atomic_dec_return(&sd->s_active);
+	if (likely(v != SD_DEACTIVATED_BIAS))
+		return;
+
+	/* atomic_dec_return() is a mb(), we'll always see the updated
+	 * sd->s_sibling.
+	 */
+	cmpl = (void *)sd->s_sibling;
+	complete(cmpl);
+}
+
+/**
+ *	sysfs_get_active_two - get active references to sysfs_dirent and parent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Get active reference to @sd and its parent.  Parent's active
+ *	reference is grabbed first.  This function is noop if @sd is
+ *	NULL.
+ *
+ *	RETURNS:
+ *	Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
+{
+	if (sd) {
+		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
+			return NULL;
+		if (unlikely(!sysfs_get_active(sd))) {
+			sysfs_put_active(sd->s_parent);
+			return NULL;
+		}
+	}
+	return sd;
+}
+
+/**
+ *	sysfs_put_active_two - put active references to sysfs_dirent and parent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Put active references to @sd and its parent.  This function is
+ *	noop if @sd is NULL.
+ */
+void sysfs_put_active_two(struct sysfs_dirent *sd)
+{
+	if (sd) {
+		sysfs_put_active(sd);
+		sysfs_put_active(sd->s_parent);
+	}
+}
+
+/**
+ *	sysfs_deactivate - deactivate sysfs_dirent
+ *	@sd: sysfs_dirent to deactivate
+ *
+ *	Deny new active references and drain existing ones.
+ */
+static void sysfs_deactivate(struct sysfs_dirent *sd)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	int v;
+
+	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
+	sd->s_sibling = (void *)&wait;
+
+	/* atomic_add_return() is a mb(), put_active() will always see
+	 * the updated sd->s_sibling.
+	 */
+	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
+
+	if (v != SD_DEACTIVATED_BIAS)
+		wait_for_completion(&wait);
+
+	sd->s_sibling = NULL;
+}
+
+static int sysfs_alloc_ino(ino_t *pino)
+{
+	int ino, rc;
+
+ retry:
+	spin_lock(&sysfs_ino_lock);
+	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
+	spin_unlock(&sysfs_ino_lock);
+
+	if (rc == -EAGAIN) {
+		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
+			goto retry;
+		rc = -ENOMEM;
+	}
+
+	*pino = ino;
+	return rc;
+}
+
+static void sysfs_free_ino(ino_t ino)
+{
+	spin_lock(&sysfs_ino_lock);
+	ida_remove(&sysfs_ino_ida, ino);
+	spin_unlock(&sysfs_ino_lock);
+}
+
+void release_sysfs_dirent(struct sysfs_dirent * sd)
+{
+	struct sysfs_dirent *parent_sd;
+
+ repeat:
+	/* Moving/renaming is always done while holding reference.
+	 * sd->s_parent won't change beneath us.
+	 */
+	parent_sd = sd->s_parent;
+
+	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
+		sysfs_put(sd->s_elem.symlink.target_sd);
+	if (sysfs_type(sd) & SYSFS_COPY_NAME)
+		kfree(sd->s_name);
+	kfree(sd->s_iattr);
+	sysfs_free_ino(sd->s_ino);
+	kmem_cache_free(sysfs_dir_cachep, sd);
+
+	sd = parent_sd;
+	if (sd && atomic_dec_and_test(&sd->s_count))
+		goto repeat;
+}
 
 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
 {
 	struct sysfs_dirent * sd = dentry->d_fsdata;
 
 	if (sd) {
-		BUG_ON(sd->s_dentry != dentry);
-		sd->s_dentry = NULL;
+		/* sd->s_dentry is protected with sysfs_assoc_lock.
+		 * This allows sysfs_drop_dentry() to dereference it.
+		 */
+		spin_lock(&sysfs_assoc_lock);
+
+		/* The dentry might have been deleted or another
+		 * lookup could have happened updating sd->s_dentry to
+		 * point the new dentry.  Ignore if it isn't pointing
+		 * to this dentry.
+		 */
+		if (sd->s_dentry == dentry)
+			sd->s_dentry = NULL;
+		spin_unlock(&sysfs_assoc_lock);
 		sysfs_put(sd);
 	}
 	iput(inode);
@@ -30,245 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
 	.d_iput		= sysfs_d_iput,
 };
 
-/*
- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
- */
-static struct sysfs_dirent * __sysfs_new_dirent(void * element)
+struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
 {
-	struct sysfs_dirent * sd;
+	char *dup_name = NULL;
+	struct sysfs_dirent *sd = NULL;
+
+	if (type & SYSFS_COPY_NAME) {
+		name = dup_name = kstrdup(name, GFP_KERNEL);
+		if (!name)
+			goto err_out;
+	}
 
 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
-		return NULL;
+		goto err_out;
+
+	if (sysfs_alloc_ino(&sd->s_ino))
+		goto err_out;
 
 	atomic_set(&sd->s_count, 1);
+	atomic_set(&sd->s_active, 0);
 	atomic_set(&sd->s_event, 1);
-	INIT_LIST_HEAD(&sd->s_children);
-	INIT_LIST_HEAD(&sd->s_sibling);
-	sd->s_element = element;
+
+	sd->s_name = name;
+	sd->s_mode = mode;
+	sd->s_flags = type;
 
 	return sd;
+
+ err_out:
+	kfree(dup_name);
+	kmem_cache_free(sysfs_dir_cachep, sd);
+	return NULL;
 }
 
-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
-			      struct sysfs_dirent *sd)
+/**
+ *	sysfs_attach_dentry - associate sysfs_dirent with dentry
+ *	@sd: target sysfs_dirent
+ *	@dentry: dentry to associate
+ *
+ *	Associate @sd with @dentry.  This is protected by
+ *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
+ *
+ *	LOCKING:
+ *	mutex_lock(sysfs_mutex)
+ */
+static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
 {
-	if (sd)
-		list_add(&sd->s_sibling, &parent_sd->s_children);
+	dentry->d_op = &sysfs_dentry_ops;
+	dentry->d_fsdata = sysfs_get(sd);
+
+	/* protect sd->s_dentry against sysfs_d_iput */
+	spin_lock(&sysfs_assoc_lock);
+	sd->s_dentry = dentry;
+	spin_unlock(&sysfs_assoc_lock);
+
+	d_rehash(dentry);
 }
 
-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
-						void * element)
+static int sysfs_ilookup_test(struct inode *inode, void *arg)
 {
-	struct sysfs_dirent *sd;
-	sd = __sysfs_new_dirent(element);
-	__sysfs_list_dirent(parent_sd, sd);
-	return sd;
+	struct sysfs_dirent *sd = arg;
+	return inode->i_ino == sd->s_ino;
 }
 
-/*
+/**
+ *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
+ *	@acxt: pointer to sysfs_addrm_cxt to be used
+ *	@parent_sd: parent sysfs_dirent
  *
- * Return -EEXIST if there is already a sysfs element with the same name for
- * the same parent.
+ *	This function is called when the caller is about to add or
+ *	remove sysfs_dirent under @parent_sd.  This function acquires
+ *	sysfs_mutex, grabs inode for @parent_sd if available and lock
+ *	i_mutex of it.  @acxt is used to keep and pass context to
+ *	other addrm functions.
  *
- * called with parent inode's i_mutex held
+ *	LOCKING:
+ *	Kernel thread context (may sleep).  sysfs_mutex is locked on
+ *	return.  i_mutex of parent inode is locked on return if
+ *	available.
  */
-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
-			  const unsigned char *new)
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+		       struct sysfs_dirent *parent_sd)
 {
-	struct sysfs_dirent * sd;
+	struct inode *inode;
 
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_element) {
-			const unsigned char *existing = sysfs_get_name(sd);
-			if (strcmp(existing, new))
-				continue;
-			else
-				return -EEXIST;
-		}
-	}
+	memset(acxt, 0, sizeof(*acxt));
+	acxt->parent_sd = parent_sd;
 
-	return 0;
+	/* Lookup parent inode.  inode initialization and I_NEW
+	 * clearing are protected by sysfs_mutex.  By grabbing it and
+	 * looking up with _nowait variant, inode state can be
+	 * determined reliably.
+	 */
+	mutex_lock(&sysfs_mutex);
+
+	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
+				parent_sd);
+
+	if (inode && !(inode->i_state & I_NEW)) {
+		/* parent inode available */
+		acxt->parent_inode = inode;
+
+		/* sysfs_mutex is below i_mutex in lock hierarchy.
+		 * First, trylock i_mutex.  If fails, unlock
+		 * sysfs_mutex and lock them in order.
+		 */
+		if (!mutex_trylock(&inode->i_mutex)) {
+			mutex_unlock(&sysfs_mutex);
+			mutex_lock(&inode->i_mutex);
+			mutex_lock(&sysfs_mutex);
+		}
+	} else
+		iput(inode);
 }
 
+/**
+ *	sysfs_add_one - add sysfs_dirent to parent
+ *	@acxt: addrm context to use
+ *	@sd: sysfs_dirent to be added
+ *
+ *	Get @acxt->parent_sd and set sd->s_parent to it and increment
+ *	nlink of parent inode if @sd is a directory.  @sd is NOT
+ *	linked into the children list of the parent.  The caller
+ *	should invoke sysfs_link_sibling() after this function
+ *	completes if @sd needs to be on the children list.
+ *
+ *	This function should be called between calls to
+ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *	passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *	LOCKING:
+ *	Determined by sysfs_addrm_start().
+ */
+void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+{
+	sd->s_parent = sysfs_get(acxt->parent_sd);
+
+	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
+		inc_nlink(acxt->parent_inode);
+
+	acxt->cnt++;
+}
 
-static struct sysfs_dirent *
-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
+/**
+ *	sysfs_remove_one - remove sysfs_dirent from parent
+ *	@acxt: addrm context to use
+ *	@sd: sysfs_dirent to be added
+ *
+ *	Mark @sd removed and drop nlink of parent inode if @sd is a
+ *	directory.  @sd is NOT unlinked from the children list of the
+ *	parent.  The caller is repsonsible for removing @sd from the
+ *	children list before calling this function.
+ *
+ *	This function should be called between calls to
+ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *	passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *	LOCKING:
+ *	Determined by sysfs_addrm_start().
+ */
+void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent * sd;
+	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
 
-	sd = __sysfs_new_dirent(element);
-	if (!sd)
-		goto out;
+	sd->s_flags |= SYSFS_FLAG_REMOVED;
+	sd->s_sibling = acxt->removed;
+	acxt->removed = sd;
 
-	sd->s_mode = mode;
-	sd->s_type = type;
-	sd->s_dentry = dentry;
-	if (dentry) {
-		dentry->d_fsdata = sysfs_get(sd);
-		dentry->d_op = &sysfs_dentry_ops;
-	}
+	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
+		drop_nlink(acxt->parent_inode);
 
-out:
-	return sd;
+	acxt->cnt++;
 }
 
-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
-			void * element, umode_t mode, int type)
+/**
+ *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
+ *	@sd: target sysfs_dirent
+ *
+ *	Drop dentry for @sd.  @sd must have been unlinked from its
+ *	parent on entry to this function such that it can't be looked
+ *	up anymore.
+ *
+ *	@sd->s_dentry which is protected with sysfs_assoc_lock points
+ *	to the currently associated dentry but we're not holding a
+ *	reference to it and racing with dput().  Grab dcache_lock and
+ *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
+ *	dput() beats us, no need to bother.
+ */
+static void sysfs_drop_dentry(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *sd;
+	struct dentry *dentry = NULL;
+	struct inode *inode;
+
+	/* We're not holding a reference to ->s_dentry dentry but the
+	 * field will stay valid as long as sysfs_assoc_lock is held.
+	 */
+	spin_lock(&sysfs_assoc_lock);
+	spin_lock(&dcache_lock);
+
+	/* drop dentry if it's there and dput() didn't kill it yet */
+	if (sd->s_dentry && sd->s_dentry->d_inode) {
+		dentry = dget_locked(sd->s_dentry);
+		spin_lock(&dentry->d_lock);
+		__d_drop(dentry);
+		spin_unlock(&dentry->d_lock);
+	}
 
-	sd = __sysfs_make_dirent(dentry, element, mode, type);
-	__sysfs_list_dirent(parent_sd, sd);
+	spin_unlock(&dcache_lock);
+	spin_unlock(&sysfs_assoc_lock);
 
-	return sd ? 0 : -ENOMEM;
+	/* dentries for shadowed inodes are pinned, unpin */
+	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
+		dput(dentry);
+	dput(dentry);
+
+	/* adjust nlink and update timestamp */
+	inode = ilookup(sysfs_sb, sd->s_ino);
+	if (inode) {
+		mutex_lock(&inode->i_mutex);
+
+		inode->i_ctime = CURRENT_TIME;
+		drop_nlink(inode);
+		if (sysfs_type(sd) == SYSFS_DIR)
+			drop_nlink(inode);
+
+		mutex_unlock(&inode->i_mutex);
+		iput(inode);
+	}
 }
 
-static int init_dir(struct inode * inode)
+/**
+ *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
+ *	@acxt: addrm context to finish up
+ *
+ *	Finish up sysfs_dirent add/remove.  Resources acquired by
+ *	sysfs_addrm_start() are released and removed sysfs_dirents are
+ *	cleaned up.  Timestamps on the parent inode are updated.
+ *
+ *	LOCKING:
+ *	All mutexes acquired by sysfs_addrm_start() are released.
+ *
+ *	RETURNS:
+ *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
+ */
+int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 {
-	inode->i_op = &sysfs_dir_inode_operations;
-	inode->i_fop = &sysfs_dir_operations;
+	/* release resources acquired by sysfs_addrm_start() */
+	mutex_unlock(&sysfs_mutex);
+	if (acxt->parent_inode) {
+		struct inode *inode = acxt->parent_inode;
 
-	/* directory inodes start off with i_nlink == 2 (for "." entry) */
-	inc_nlink(inode);
-	return 0;
+		/* if added/removed, update timestamps on the parent */
+		if (acxt->cnt)
+			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+		mutex_unlock(&inode->i_mutex);
+		iput(inode);
+	}
+
+	/* kill removed sysfs_dirents */
+	while (acxt->removed) {
+		struct sysfs_dirent *sd = acxt->removed;
+
+		acxt->removed = sd->s_sibling;
+		sd->s_sibling = NULL;
+
+		sysfs_drop_dentry(sd);
+		sysfs_deactivate(sd);
+		sysfs_put(sd);
+	}
+
+	return acxt->cnt;
 }
 
-static int init_file(struct inode * inode)
+/**
+ *	sysfs_find_dirent - find sysfs_dirent with the given name
+ *	@parent_sd: sysfs_dirent to search under
+ *	@name: name to look for
+ *
+ *	Look for sysfs_dirent with name @name under @parent_sd.
+ *
+ *	LOCKING:
+ *	mutex_lock(sysfs_mutex)
+ *
+ *	RETURNS:
+ *	Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+				       const unsigned char *name)
 {
-	inode->i_size = PAGE_SIZE;
-	inode->i_fop = &sysfs_file_operations;
-	return 0;
+	struct sysfs_dirent *sd;
+
+	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
+		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
+			return sd;
+	return NULL;
 }
 
-static int init_symlink(struct inode * inode)
+/**
+ *	sysfs_get_dirent - find and get sysfs_dirent with the given name
+ *	@parent_sd: sysfs_dirent to search under
+ *	@name: name to look for
+ *
+ *	Look for sysfs_dirent with name @name under @parent_sd and get
+ *	it if found.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
+ *
+ *	RETURNS:
+ *	Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const unsigned char *name)
 {
-	inode->i_op = &sysfs_symlink_inode_operations;
-	return 0;
+	struct sysfs_dirent *sd;
+
+	mutex_lock(&sysfs_mutex);
+	sd = sysfs_find_dirent(parent_sd, name);
+	sysfs_get(sd);
+	mutex_unlock(&sysfs_mutex);
+
+	return sd;
 }
 
-static int create_dir(struct kobject * k, struct dentry * p,
-		      const char * n, struct dentry ** d)
+static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
+		      const char *name, struct sysfs_dirent **p_sd)
 {
-	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *sd;
 
-	mutex_lock(&p->d_inode->i_mutex);
-	*d = lookup_one_len(n, p, strlen(n));
-	if (!IS_ERR(*d)) {
- 		if (sysfs_dirent_exist(p->d_fsdata, n))
-  			error = -EEXIST;
-  		else
-			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
-								SYSFS_DIR);
-		if (!error) {
-			error = sysfs_create(*d, mode, init_dir);
-			if (!error) {
-				inc_nlink(p->d_inode);
-				(*d)->d_op = &sysfs_dentry_ops;
-				d_rehash(*d);
-			}
-		}
-		if (error && (error != -EEXIST)) {
-			struct sysfs_dirent *sd = (*d)->d_fsdata;
-			if (sd) {
- 				list_del_init(&sd->s_sibling);
-				sysfs_put(sd);
-			}
-			d_drop(*d);
-		}
-		dput(*d);
-	} else
-		error = PTR_ERR(*d);
-	mutex_unlock(&p->d_inode->i_mutex);
-	return error;
-}
+	/* allocate */
+	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
+	if (!sd)
+		return -ENOMEM;
+	sd->s_elem.dir.kobj = kobj;
 
+	/* link in */
+	sysfs_addrm_start(&acxt, parent_sd);
+	if (!sysfs_find_dirent(parent_sd, name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+	if (sysfs_addrm_finish(&acxt)) {
+		*p_sd = sd;
+		return 0;
+	}
 
-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
+	sysfs_put(sd);
+	return -EEXIST;
+}
+
+int sysfs_create_subdir(struct kobject *kobj, const char *name,
+			struct sysfs_dirent **p_sd)
 {
-	return create_dir(k,k->dentry,n,d);
+	return create_dir(kobj, kobj->sd, name, p_sd);
 }
 
 /**
  *	sysfs_create_dir - create a directory for an object.
  *	@kobj:		object we're creating directory for. 
- *	@shadow_parent:	parent parent object.
+ *	@shadow_parent:	parent object.
  */
-
-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
+int sysfs_create_dir(struct kobject *kobj,
+		     struct sysfs_dirent *shadow_parent_sd)
 {
-	struct dentry * dentry = NULL;
-	struct dentry * parent;
+	struct sysfs_dirent *parent_sd, *sd;
 	int error = 0;
 
 	BUG_ON(!kobj);
 
-	if (shadow_parent)
-		parent = shadow_parent;
+	if (shadow_parent_sd)
+		parent_sd = shadow_parent_sd;
 	else if (kobj->parent)
-		parent = kobj->parent->dentry;
+		parent_sd = kobj->parent->sd;
 	else if (sysfs_mount && sysfs_mount->mnt_sb)
-		parent = sysfs_mount->mnt_sb->s_root;
+		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
 	else
 		return -EFAULT;
 
-	error = create_dir(kobj,parent,kobject_name(kobj),&dentry);
+	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
 	if (!error)
-		kobj->dentry = dentry;
+		kobj->sd = sd;
 	return error;
 }
 
-/* attaches attribute's sysfs_dirent to the dentry corresponding to the
- * attribute file
- */
-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
+static int sysfs_count_nlink(struct sysfs_dirent *sd)
 {
-	struct attribute * attr = NULL;
-	struct bin_attribute * bin_attr = NULL;
-	int (* init) (struct inode *) = NULL;
-	int error = 0;
+	struct sysfs_dirent *child;
+	int nr = 0;
 
-        if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
-                bin_attr = sd->s_element;
-                attr = &bin_attr->attr;
-        } else {
-                attr = sd->s_element;
-                init = init_file;
-        }
-
-	dentry->d_fsdata = sysfs_get(sd);
-	sd->s_dentry = dentry;
-	error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
-	if (error) {
-		sysfs_put(sd);
-		return error;
-	}
-
-        if (bin_attr) {
-		dentry->d_inode->i_size = bin_attr->size;
-		dentry->d_inode->i_fop = &bin_fops;
-	}
-	dentry->d_op = &sysfs_dentry_ops;
-	d_rehash(dentry);
-
-	return 0;
-}
-
-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
-{
-	int err = 0;
-
-	dentry->d_fsdata = sysfs_get(sd);
-	sd->s_dentry = dentry;
-	err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
-	if (!err) {
-		dentry->d_op = &sysfs_dentry_ops;
-		d_rehash(dentry);
-	} else
-		sysfs_put(sd);
-
-	return err;
+	for (child = sd->s_children; child; child = child->s_sibling)
+		if (sysfs_type(child) == SYSFS_DIR)
+			nr++;
+	return nr + 2;
 }
 
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -276,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct sysfs_dirent * sd;
-	int err = 0;
+	struct bin_attribute *bin_attr;
+	struct inode *inode;
+	int found = 0;
 
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_type & SYSFS_NOT_PINNED) {
-			const unsigned char * name = sysfs_get_name(sd);
+	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
+		if (sysfs_type(sd) &&
+		    !strcmp(sd->s_name, dentry->d_name.name)) {
+			found = 1;
+			break;
+		}
+	}
+
+	/* no such entry */
+	if (!found)
+		return NULL;
 
-			if (strcmp(name, dentry->d_name.name))
-				continue;
+	/* attach dentry and inode */
+	inode = sysfs_get_inode(sd);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
 
-			if (sd->s_type & SYSFS_KOBJ_LINK)
-				err = sysfs_attach_link(sd, dentry);
-			else
-				err = sysfs_attach_attr(sd, dentry);
+	mutex_lock(&sysfs_mutex);
+
+	if (inode->i_state & I_NEW) {
+		/* initialize inode according to type */
+		switch (sysfs_type(sd)) {
+		case SYSFS_DIR:
+			inode->i_op = &sysfs_dir_inode_operations;
+			inode->i_fop = &sysfs_dir_operations;
+			inode->i_nlink = sysfs_count_nlink(sd);
 			break;
+		case SYSFS_KOBJ_ATTR:
+			inode->i_size = PAGE_SIZE;
+			inode->i_fop = &sysfs_file_operations;
+			break;
+		case SYSFS_KOBJ_BIN_ATTR:
+			bin_attr = sd->s_elem.bin_attr.bin_attr;
+			inode->i_size = bin_attr->size;
+			inode->i_fop = &bin_fops;
+			break;
+		case SYSFS_KOBJ_LINK:
+			inode->i_op = &sysfs_symlink_inode_operations;
+			break;
+		default:
+			BUG();
 		}
 	}
 
-	return ERR_PTR(err);
+	sysfs_instantiate(dentry, inode);
+	sysfs_attach_dentry(sd, dentry);
+
+	mutex_unlock(&sysfs_mutex);
+
+	return NULL;
 }
 
 const struct inode_operations sysfs_dir_inode_operations = {
@@ -301,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
 	.setattr	= sysfs_setattr,
 };
 
-static void remove_dir(struct dentry * d)
+static void remove_dir(struct sysfs_dirent *sd)
 {
-	struct dentry * parent = dget(d->d_parent);
-	struct sysfs_dirent * sd;
-
-	mutex_lock(&parent->d_inode->i_mutex);
-	d_delete(d);
-	sd = d->d_fsdata;
- 	list_del_init(&sd->s_sibling);
-	sysfs_put(sd);
-	if (d->d_inode)
-		simple_rmdir(parent->d_inode,d);
+	struct sysfs_addrm_cxt acxt;
 
-	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
-		 atomic_read(&d->d_count));
-
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
+	sysfs_addrm_start(&acxt, sd->s_parent);
+	sysfs_unlink_sibling(sd);
+	sysfs_remove_one(&acxt, sd);
+	sysfs_addrm_finish(&acxt);
 }
 
-void sysfs_remove_subdir(struct dentry * d)
+void sysfs_remove_subdir(struct sysfs_dirent *sd)
 {
-	remove_dir(d);
+	remove_dir(sd);
 }
 
 
-static void __sysfs_remove_dir(struct dentry *dentry)
+static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 {
-	struct sysfs_dirent * parent_sd;
-	struct sysfs_dirent * sd, * tmp;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent **pos;
 
-	dget(dentry);
-	if (!dentry)
+	if (!dir_sd)
 		return;
 
-	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
-	mutex_lock(&dentry->d_inode->i_mutex);
-	parent_sd = dentry->d_fsdata;
-	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
-		if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
-			continue;
-		list_del_init(&sd->s_sibling);
-		sysfs_drop_dentry(sd, dentry);
-		sysfs_put(sd);
+	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
+	sysfs_addrm_start(&acxt, dir_sd);
+	pos = &dir_sd->s_children;
+	while (*pos) {
+		struct sysfs_dirent *sd = *pos;
+
+		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			sysfs_remove_one(&acxt, sd);
+		} else
+			pos = &(*pos)->s_sibling;
 	}
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	sysfs_addrm_finish(&acxt);
 
-	remove_dir(dentry);
-	/**
-	 * Drop reference from dget() on entrance.
-	 */
-	dput(dentry);
+	remove_dir(dir_sd);
 }
 
 /**
@@ -366,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	__sysfs_remove_dir(kobj->dentry);
-	kobj->dentry = NULL;
+	struct sysfs_dirent *sd = kobj->sd;
+
+	spin_lock(&sysfs_assoc_lock);
+	kobj->sd = NULL;
+	spin_unlock(&sysfs_assoc_lock);
+
+	__sysfs_remove_dir(sd);
 }
 
-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
+int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
 		     const char *new_name)
 {
-	int error = 0;
-	struct dentry * new_dentry;
+	struct sysfs_dirent *sd = kobj->sd;
+	struct dentry *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
+	const char *dup_name = NULL;
+	int error;
 
-	if (!new_parent)
-		return -EFAULT;
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
+
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
 
-	down_write(&sysfs_rename_sem);
+	/* lock new_parent and get dentry for new name */
 	mutex_lock(&new_parent->d_inode->i_mutex);
 
 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
-	if (!IS_ERR(new_dentry)) {
-		/* By allowing two different directories with the
-		 * same d_parent we allow this routine to move
-		 * between different shadows of the same directory
-		 */
-		if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
-			return -EINVAL;
-		else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
-			error = -EINVAL;
-		else if (new_dentry == kobj->dentry)
-			error = -EINVAL;
-		else if (!new_dentry->d_inode) {
-			error = kobject_set_name(kobj, "%s", new_name);
-			if (!error) {
-				struct sysfs_dirent *sd, *parent_sd;
-
-				d_add(new_dentry, NULL);
-				d_move(kobj->dentry, new_dentry);
-
-				sd = kobj->dentry->d_fsdata;
-				parent_sd = new_parent->d_fsdata;
-
-				list_del_init(&sd->s_sibling);
-				list_add(&sd->s_sibling, &parent_sd->s_children);
-			}
-			else
-				d_drop(new_dentry);
-		} else
-			error = -EEXIST;
-		dput(new_dentry);
+	if (IS_ERR(new_dentry)) {
+		error = PTR_ERR(new_dentry);
+		goto out_unlock;
 	}
-	mutex_unlock(&new_parent->d_inode->i_mutex);
-	up_write(&sysfs_rename_sem);
 
+	/* By allowing two different directories with the same
+	 * d_parent we allow this routine to move between different
+	 * shadows of the same directory
+	 */
+	error = -EINVAL;
+	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
+	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
+	    old_dentry == new_dentry)
+		goto out_unlock;
+
+	error = -EEXIST;
+	if (new_dentry->d_inode)
+		goto out_unlock;
+
+	/* rename kobject and sysfs_dirent */
+	error = -ENOMEM;
+	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+	if (!new_name)
+		goto out_drop;
+
+	error = kobject_set_name(kobj, "%s", new_name);
+	if (error)
+		goto out_drop;
+
+	dup_name = sd->s_name;
+	sd->s_name = new_name;
+
+	/* move under the new parent */
+	d_add(new_dentry, NULL);
+	d_move(sd->s_dentry, new_dentry);
+
+	mutex_lock(&sysfs_mutex);
+
+	sysfs_unlink_sibling(sd);
+	sysfs_get(new_parent_sd);
+	sysfs_put(sd->s_parent);
+	sd->s_parent = new_parent_sd;
+	sysfs_link_sibling(sd);
+
+	mutex_unlock(&sysfs_mutex);
+
+	error = 0;
+	goto out_unlock;
+
+ out_drop:
+	d_drop(new_dentry);
+ out_unlock:
+	mutex_unlock(&new_parent->d_inode->i_mutex);
+ out_dput:
+	kfree(dup_name);
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
-	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
-	struct sysfs_dirent *new_parent_sd, *sd;
+	struct sysfs_dirent *sd = kobj->sd;
+	struct sysfs_dirent *new_parent_sd;
+	struct dentry *old_parent, *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
 	int error;
 
-	old_parent_dentry = kobj->parent ?
-		kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
-	new_parent_dentry = new_parent ?
-		new_parent->dentry : sysfs_mount->mnt_sb->s_root;
+	BUG_ON(!sd->s_parent);
+	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
+	old_parent = sd->s_parent->s_dentry;
+
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
 
-	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
-		return 0;	/* nothing to move */
+	if (old_parent->d_inode == new_parent->d_inode) {
+		error = 0;
+		goto out_dput;	/* nothing to move */
+	}
 again:
-	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
-	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
-		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+	mutex_lock(&old_parent->d_inode->i_mutex);
+	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
+		mutex_unlock(&old_parent->d_inode->i_mutex);
 		goto again;
 	}
 
-	new_parent_sd = new_parent_dentry->d_fsdata;
-	sd = kobj->dentry->d_fsdata;
-
-	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
-				    strlen(kobj->name));
+	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
 	if (IS_ERR(new_dentry)) {
 		error = PTR_ERR(new_dentry);
-		goto out;
+		goto out_unlock;
 	} else
 		error = 0;
 	d_add(new_dentry, NULL);
-	d_move(kobj->dentry, new_dentry);
+	d_move(sd->s_dentry, new_dentry);
 	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
-	list_del_init(&sd->s_sibling);
-	list_add(&sd->s_sibling, &new_parent_sd->s_children);
+	mutex_lock(&sysfs_mutex);
 
-out:
-	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
-	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+	sysfs_unlink_sibling(sd);
+	sysfs_get(new_parent_sd);
+	sysfs_put(sd->s_parent);
+	sd->s_parent = new_parent_sd;
+	sysfs_link_sibling(sd);
 
+	mutex_unlock(&sysfs_mutex);
+
+ out_unlock:
+	mutex_unlock(&new_parent->d_inode->i_mutex);
+	mutex_unlock(&old_parent->d_inode->i_mutex);
+ out_dput:
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
@@ -469,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+	struct sysfs_dirent * sd;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	file->private_data = sysfs_new_dirent(parent_sd, NULL);
-	mutex_unlock(&dentry->d_inode->i_mutex);
-
-	return file->private_data ? 0 : -ENOMEM;
+	sd = sysfs_new_dirent("_DIR_", 0, 0);
+	if (sd) {
+		mutex_lock(&sysfs_mutex);
+		sd->s_parent = sysfs_get(parent_sd);
+		sysfs_link_sibling(sd);
+		mutex_unlock(&sysfs_mutex);
+	}
 
+	file->private_data = sd;
+	return sd ? 0 : -ENOMEM;
 }
 
 static int sysfs_dir_close(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * cursor = file->private_data;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	list_del_init(&cursor->s_sibling);
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	mutex_lock(&sysfs_mutex);
+	sysfs_unlink_sibling(cursor);
+	mutex_unlock(&sysfs_mutex);
 
 	release_sysfs_dirent(cursor);
 
@@ -503,54 +1080,65 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct dentry *dentry = filp->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 	struct sysfs_dirent *cursor = filp->private_data;
-	struct list_head *p, *q = &cursor->s_sibling;
+	struct sysfs_dirent **pos;
 	ino_t ino;
 	int i = filp->f_pos;
 
 	switch (i) {
 		case 0:
-			ino = dentry->d_inode->i_ino;
+			ino = parent_sd->s_ino;
 			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
 				break;
 			filp->f_pos++;
 			i++;
 			/* fallthrough */
 		case 1:
-			ino = parent_ino(dentry);
+			if (parent_sd->s_parent)
+				ino = parent_sd->s_parent->s_ino;
+			else
+				ino = parent_sd->s_ino;
 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
 				break;
 			filp->f_pos++;
 			i++;
 			/* fallthrough */
 		default:
+			mutex_lock(&sysfs_mutex);
+
+			pos = &parent_sd->s_children;
+			while (*pos != cursor)
+				pos = &(*pos)->s_sibling;
+
+			/* unlink cursor */
+			*pos = cursor->s_sibling;
+
 			if (filp->f_pos == 2)
-				list_move(q, &parent_sd->s_children);
+				pos = &parent_sd->s_children;
 
-			for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
-				struct sysfs_dirent *next;
+			for ( ; *pos; pos = &(*pos)->s_sibling) {
+				struct sysfs_dirent *next = *pos;
 				const char * name;
 				int len;
 
-				next = list_entry(p, struct sysfs_dirent,
-						   s_sibling);
-				if (!next->s_element)
+				if (!sysfs_type(next))
 					continue;
 
-				name = sysfs_get_name(next);
+				name = next->s_name;
 				len = strlen(name);
-				if (next->s_dentry)
-					ino = next->s_dentry->d_inode->i_ino;
-				else
-					ino = iunique(sysfs_sb, 2);
+				ino = next->s_ino;
 
 				if (filldir(dirent, name, len, filp->f_pos, ino,
 						 dt_type(next)) < 0)
-					return 0;
+					break;
 
-				list_move(q, p);
-				p = q;
 				filp->f_pos++;
 			}
+
+			/* put cursor back in */
+			cursor->s_sibling = *pos;
+			*pos = cursor;
+
+			mutex_unlock(&sysfs_mutex);
 	}
 	return 0;
 }
@@ -559,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
 	struct dentry * dentry = file->f_path.dentry;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -567,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
+		mutex_lock(&sysfs_mutex);
+
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
 			struct sysfs_dirent *sd = dentry->d_fsdata;
 			struct sysfs_dirent *cursor = file->private_data;
-			struct list_head *p;
+			struct sysfs_dirent **pos;
 			loff_t n = file->f_pos - 2;
 
-			list_del(&cursor->s_sibling);
-			p = sd->s_children.next;
-			while (n && p != &sd->s_children) {
-				struct sysfs_dirent *next;
-				next = list_entry(p, struct sysfs_dirent,
-						   s_sibling);
-				if (next->s_element)
+			sysfs_unlink_sibling(cursor);
+
+			pos = &sd->s_children;
+			while (n && *pos) {
+				struct sysfs_dirent *next = *pos;
+				if (sysfs_type(next))
 					n--;
-				p = p->next;
+				pos = &(*pos)->s_sibling;
 			}
-			list_add_tail(&cursor->s_sibling, p);
+
+			cursor->s_sibling = *pos;
+			*pos = cursor;
 		}
+
+		mutex_unlock(&sysfs_mutex);
 	}
-	mutex_unlock(&dentry->d_inode->i_mutex);
+
 	return offset;
 }
 
@@ -604,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 int sysfs_make_shadowed_dir(struct kobject *kobj,
 	void * (*follow_link)(struct dentry *, struct nameidata *))
 {
+	struct dentry *dentry;
 	struct inode *inode;
 	struct inode_operations *i_op;
 
-	inode = kobj->dentry->d_inode;
-	if (inode->i_op != &sysfs_dir_inode_operations)
+	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
+	dentry = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	inode = dentry->d_inode;
+	if (inode->i_op != &sysfs_dir_inode_operations) {
+		dput(dentry);
 		return -EINVAL;
+	}
 
 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
 	if (!i_op)
@@ -634,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
  *	directory.
  */
 
-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
+struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
 {
-	struct sysfs_dirent *sd;
-	struct dentry *parent, *dir, *shadow;
+	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+	struct dentry *dir, *parent, *shadow;
 	struct inode *inode;
+	struct sysfs_dirent *sd;
+	struct sysfs_addrm_cxt acxt;
 
-	dir = kobj->dentry;
-	inode = dir->d_inode;
+	dir = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dir)) {
+		sd = (void *)dir;
+		goto out;
+	}
 	parent = dir->d_parent;
-	shadow = ERR_PTR(-EINVAL);
+
+	inode = dir->d_inode;
+	sd = ERR_PTR(-EINVAL);
 	if (!sysfs_is_shadowed_inode(inode))
-		goto out;
+		goto out_dput;
 
 	shadow = d_alloc(parent, &dir->d_name);
 	if (!shadow)
 		goto nomem;
 
-	sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
+	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
 	if (!sd)
 		goto nomem;
+	sd->s_elem.dir.kobj = kobj;
 
+	sysfs_addrm_start(&acxt, parent_sd);
+
+	/* add but don't link into children list */
+	sysfs_add_one(&acxt, sd);
+
+	/* attach and instantiate dentry */
+	sysfs_attach_dentry(sd, shadow);
 	d_instantiate(shadow, igrab(inode));
-	inc_nlink(inode);
-	inc_nlink(parent->d_inode);
-	shadow->d_op = &sysfs_dentry_ops;
+	inc_nlink(inode);	/* tj: synchronization? */
+
+	sysfs_addrm_finish(&acxt);
 
 	dget(shadow);		/* Extra count - pin the dentry in core */
 
-out:
-	return shadow;
-nomem:
+	goto out_dput;
+
+ nomem:
 	dput(shadow);
-	shadow = ERR_PTR(-ENOMEM);
-	goto out;
+	sd = ERR_PTR(-ENOMEM);
+ out_dput:
+	dput(dir);
+ out:
+	return sd;
 }
 
 /**
  *	sysfs_remove_shadow_dir - remove an object's directory.
- *	@shadow: dentry of shadow directory
+ *	@shadow_sd: sysfs_dirent of shadow directory
  *
  *	The only thing special about this is that we remove any files in
  *	the directory before we remove the directory, and we've inlined
  *	what used to be sysfs_rmdir() below, instead of calling separately.
  */
 
-void sysfs_remove_shadow_dir(struct dentry *shadow)
+void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
 {
-	__sysfs_remove_dir(shadow);
+	__sysfs_remove_dir(shadow_sd);
 }
 
 const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec..cc497994b2a 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
 	.store	= subsys_attr_store,
 };
 
-/**
- *	add_to_collection - add buffer to a collection
- *	@buffer:	buffer to be added
- *	@node:		inode of set to add to
- */
-
-static inline void
-add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
-{
-	struct sysfs_buffer_collection *set = node->i_private;
-
-	mutex_lock(&node->i_mutex);
-	list_add(&buffer->associates, &set->associates);
-	mutex_unlock(&node->i_mutex);
-}
-
-static inline void
-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
-{
-	mutex_lock(&node->i_mutex);
-	list_del(&buffer->associates);
-	mutex_unlock(&node->i_mutex);
-}
+struct sysfs_buffer {
+	size_t			count;
+	loff_t			pos;
+	char			* page;
+	struct sysfs_ops	* ops;
+	struct semaphore	sem;
+	int			needs_read_fill;
+	int			event;
+};
 
 /**
  *	fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
  */
 static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	struct attribute * attr = to_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 	if (!buffer->page)
 		return -ENOMEM;
 
-	buffer->event = atomic_read(&sd->s_event);
-	count = ops->show(kobj,attr,buffer->page);
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	buffer->event = atomic_read(&attr_sd->s_event);
+	count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
+
+	sysfs_put_active_two(attr_sd);
+
 	BUG_ON(count > (ssize_t)PAGE_SIZE);
 	if (count >= 0) {
 		buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 	down(&buffer->sem);
 	if (buffer->needs_read_fill) {
-		if (buffer->orphaned)
-			retval = -ENODEV;
-		else
-			retval = fill_read_buffer(file->f_path.dentry,buffer);
+		retval = fill_read_buffer(file->f_path.dentry,buffer);
 		if (retval)
 			goto out;
 	}
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
  *	passing the buffer that we acquired in fill_write_buffer().
  */
 
-static int 
+static int
 flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
 {
-	struct attribute * attr = to_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
+	int rc;
+
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
 
-	return ops->store(kobj,attr,buffer->page,count);
+	sysfs_put_active_two(attr_sd);
+
+	return rc;
 }
 
 
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
 	ssize_t len;
 
 	down(&buffer->sem);
-	if (buffer->orphaned) {
-		len = -ENODEV;
-		goto out;
-	}
 	len = fill_write_buffer(buffer, buf, count);
 	if (len > 0)
 		len = flush_write_buffer(file->f_path.dentry, buffer, len);
 	if (len > 0)
 		*ppos += len;
-out:
 	up(&buffer->sem);
 	return len;
 }
 
 static int sysfs_open_file(struct inode *inode, struct file *file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
-	struct attribute * attr = to_attr(file->f_path.dentry);
-	struct sysfs_buffer_collection *set;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_buffer * buffer;
 	struct sysfs_ops * ops = NULL;
-	int error = 0;
-
-	if (!kobj || !attr)
-		goto Einval;
+	int error;
 
-	/* Grab the module reference for this attribute if we have one */
-	if (!try_module_get(attr->owner)) {
-		error = -ENODEV;
-		goto Done;
-	}
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
 	/* if the kobject has no ktype, then we assume that it is a subsystem
 	 * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	else
 		ops = &subsys_sysfs_ops;
 
+	error = -EACCES;
+
 	/* No sysfs operations, either from having no subsystem,
 	 * or the subsystem have no operations.
 	 */
 	if (!ops)
-		goto Eaccess;
-
-	/* make sure we have a collection to add our buffers to */
-	mutex_lock(&inode->i_mutex);
-	if (!(set = inode->i_private)) {
-		if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
-			error = -ENOMEM;
-			goto Done;
-		} else {
-			INIT_LIST_HEAD(&set->associates);
-		}
-	}
-	mutex_unlock(&inode->i_mutex);
+		goto err_out;
 
 	/* File needs write support.
 	 * The inode's perms must say it's ok, 
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-
 		if (!(inode->i_mode & S_IWUGO) || !ops->store)
-			goto Eaccess;
-
+			goto err_out;
 	}
 
 	/* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	 */
 	if (file->f_mode & FMODE_READ) {
 		if (!(inode->i_mode & S_IRUGO) || !ops->show)
-			goto Eaccess;
+			goto err_out;
 	}
 
 	/* No error? Great, allocate a buffer for the file, and store it
 	 * it in file->private_data for easy access.
 	 */
+	error = -ENOMEM;
 	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
-	if (buffer) {
-		INIT_LIST_HEAD(&buffer->associates);
-		init_MUTEX(&buffer->sem);
-		buffer->needs_read_fill = 1;
-		buffer->ops = ops;
-		add_to_collection(buffer, inode);
-		file->private_data = buffer;
-	} else
-		error = -ENOMEM;
-	goto Done;
-
- Einval:
-	error = -EINVAL;
-	goto Done;
- Eaccess:
-	error = -EACCES;
-	module_put(attr->owner);
- Done:
-	if (error)
-		kobject_put(kobj);
+	if (!buffer)
+		goto err_out;
+
+	init_MUTEX(&buffer->sem);
+	buffer->needs_read_fill = 1;
+	buffer->ops = ops;
+	file->private_data = buffer;
+
+	/* open succeeded, put active references and pin attr_sd */
+	sysfs_put_active_two(attr_sd);
+	sysfs_get(attr_sd);
+	return 0;
+
+ err_out:
+	sysfs_put_active_two(attr_sd);
 	return error;
 }
 
 static int sysfs_release(struct inode * inode, struct file * filp)
 {
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
-	struct attribute * attr = to_attr(filp->f_path.dentry);
-	struct module * owner = attr->owner;
-	struct sysfs_buffer * buffer = filp->private_data;
+	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+	struct sysfs_buffer *buffer = filp->private_data;
 
-	if (buffer)
-		remove_from_collection(buffer, inode);
-	kobject_put(kobj);
-	/* After this point, attr should not be accessed. */
-	module_put(owner);
+	sysfs_put(attr_sd);
 
 	if (buffer) {
 		if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
 	struct sysfs_buffer * buffer = filp->private_data;
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
-	struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
-	int res = 0;
+	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+
+	/* need parent for the kobj, grab both */
+	if (!sysfs_get_active_two(attr_sd))
+		goto trigger;
 
 	poll_wait(filp, &kobj->poll, wait);
 
-	if (buffer->event != atomic_read(&sd->s_event)) {
-		res = POLLERR|POLLPRI;
-		buffer->needs_read_fill = 1;
-	}
+	sysfs_put_active_two(attr_sd);
 
-	return res;
-}
+	if (buffer->event != atomic_read(&attr_sd->s_event))
+		goto trigger;
 
+	return 0;
 
-static struct dentry *step_down(struct dentry *dir, const char * name)
-{
-	struct dentry * de;
-
-	if (dir == NULL || dir->d_inode == NULL)
-		return NULL;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	de = lookup_one_len(name, dir, strlen(name));
-	mutex_unlock(&dir->d_inode->i_mutex);
-	dput(dir);
-	if (IS_ERR(de))
-		return NULL;
-	if (de->d_inode == NULL) {
-		dput(de);
-		return NULL;
-	}
-	return de;
+ trigger:
+	buffer->needs_read_fill = 1;
+	return POLLERR|POLLPRI;
 }
 
-void sysfs_notify(struct kobject * k, char *dir, char *attr)
+void sysfs_notify(struct kobject *k, char *dir, char *attr)
 {
-	struct dentry *de = k->dentry;
-	if (de)
-		dget(de);
-	if (de && dir)
-		de = step_down(de, dir);
-	if (de && attr)
-		de = step_down(de, attr);
-	if (de) {
-		struct sysfs_dirent * sd = de->d_fsdata;
-		if (sd)
-			atomic_inc(&sd->s_event);
+	struct sysfs_dirent *sd = k->sd;
+
+	mutex_lock(&sysfs_mutex);
+
+	if (sd && dir)
+		sd = sysfs_find_dirent(sd, dir);
+	if (sd && attr)
+		sd = sysfs_find_dirent(sd, attr);
+	if (sd) {
+		atomic_inc(&sd->s_event);
 		wake_up_interruptible(&k->poll);
-		dput(de);
 	}
+
+	mutex_unlock(&sysfs_mutex);
 }
 EXPORT_SYMBOL_GPL(sysfs_notify);
 
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
 };
 
 
-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
+int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
+		   int type)
 {
-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
-	int error = -EEXIST;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *sd;
 
-	mutex_lock(&dir->d_inode->i_mutex);
-	if (!sysfs_dirent_exist(parent_sd, attr->name))
-		error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
-					  mode, type);
-	mutex_unlock(&dir->d_inode->i_mutex);
+	sd = sysfs_new_dirent(attr->name, mode, type);
+	if (!sd)
+		return -ENOMEM;
+	sd->s_elem.attr.attr = (void *)attr;
 
-	return error;
+	sysfs_addrm_start(&acxt, dir_sd);
+
+	if (!sysfs_find_dirent(dir_sd, attr->name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+
+	sysfs_put(sd);
+	return -EEXIST;
 }
 
 
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
-	BUG_ON(!kobj || !kobj->dentry || !attr);
+	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
+	return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
 
 }
 
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 int sysfs_add_file_to_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
-	struct dentry *dir;
+	struct sysfs_dirent *dir_sd;
 	int error;
 
-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
-	if (IS_ERR(dir))
-		error = PTR_ERR(dir);
-	else {
-		error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR);
-		dput(dir);
-	}
+	dir_sd = sysfs_get_dirent(kobj->sd, group);
+	if (!dir_sd)
+		return -ENOENT;
+
+	error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
+	sysfs_put(dir_sd);
+
 	return error;
 }
 EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
  */
 int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 {
-	struct dentry * dir = kobj->dentry;
-	struct dentry * victim;
-	int res = -ENOENT;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		/* make sure dentry is really there */
-		if (victim->d_inode && 
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			victim->d_inode->i_mtime = CURRENT_TIME;
-			fsnotify_modify(victim);
-			res = 0;
-		} else
-			d_drop(victim);
-		
-		/**
-		 * Drop the reference acquired from lookup_one_len() above.
-		 */
-		dput(victim);
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
+	int rc;
+
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	mutex_lock(&victim->d_inode->i_mutex);
+	victim->d_inode->i_mtime = CURRENT_TIME;
+	fsnotify_modify(victim);
+	mutex_unlock(&victim->d_inode->i_mutex);
+	rc = 0;
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 
 
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
  */
 int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 {
-	struct dentry *dir = kobj->dentry;
-	struct dentry *victim;
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
 	struct inode * inode;
 	struct iattr newattrs;
-	int res = -ENOENT;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		if (victim->d_inode &&
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			inode = victim->d_inode;
-			mutex_lock(&inode->i_mutex);
-			newattrs.ia_mode = (mode & S_IALLUGO) |
-						(inode->i_mode & ~S_IALLUGO);
-			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-			res = notify_change(victim, &newattrs);
-			mutex_unlock(&inode->i_mutex);
-		}
-		dput(victim);
+	int rc;
+
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	inode = victim->d_inode;
+	mutex_lock(&inode->i_mutex);
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	rc = notify_change(victim, &newattrs);
+	mutex_unlock(&inode->i_mutex);
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
-	sysfs_hash_and_remove(kobj->dentry, attr->name);
+	sysfs_hash_and_remove(kobj->sd, attr->name);
 }
 
 
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 void sysfs_remove_file_from_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
-	struct dentry *dir;
+	struct sysfs_dirent *dir_sd;
 
-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
-	if (!IS_ERR(dir)) {
-		sysfs_hash_and_remove(dir, attr->name);
-		dput(dir);
+	dir_sd = sysfs_get_dirent(kobj->sd, group);
+	if (dir_sd) {
+		sysfs_hash_and_remove(dir_sd, attr->name);
+		sysfs_put(dir_sd);
 	}
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5e..f318b73c790 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
 #include "sysfs.h"
 
 
-static void remove_files(struct dentry * dir, 
-			 const struct attribute_group * grp)
+static void remove_files(struct sysfs_dirent *dir_sd,
+			 const struct attribute_group *grp)
 {
 	struct attribute *const* attr;
 
 	for (attr = grp->attrs; *attr; attr++)
-		sysfs_hash_and_remove(dir,(*attr)->name);
+		sysfs_hash_and_remove(dir_sd, (*attr)->name);
 }
 
-static int create_files(struct dentry * dir,
-			const struct attribute_group * grp)
+static int create_files(struct sysfs_dirent *dir_sd,
+			const struct attribute_group *grp)
 {
 	struct attribute *const* attr;
 	int error = 0;
 
-	for (attr = grp->attrs; *attr && !error; attr++) {
-		error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR);
-	}
+	for (attr = grp->attrs; *attr && !error; attr++)
+		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
 	if (error)
-		remove_files(dir,grp);
+		remove_files(dir_sd, grp);
 	return error;
 }
 
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
 int sysfs_create_group(struct kobject * kobj, 
 		       const struct attribute_group * grp)
 {
-	struct dentry * dir;
+	struct sysfs_dirent *sd;
 	int error;
 
-	BUG_ON(!kobj || !kobj->dentry);
+	BUG_ON(!kobj || !kobj->sd);
 
 	if (grp->name) {
-		error = sysfs_create_subdir(kobj,grp->name,&dir);
+		error = sysfs_create_subdir(kobj, grp->name, &sd);
 		if (error)
 			return error;
 	} else
-		dir = kobj->dentry;
-	dir = dget(dir);
-	if ((error = create_files(dir,grp))) {
+		sd = kobj->sd;
+	sysfs_get(sd);
+	error = create_files(sd, grp);
+	if (error) {
 		if (grp->name)
-			sysfs_remove_subdir(dir);
+			sysfs_remove_subdir(sd);
 	}
-	dput(dir);
+	sysfs_put(sd);
 	return error;
 }
 
 void sysfs_remove_group(struct kobject * kobj, 
 			const struct attribute_group * grp)
 {
-	struct dentry * dir;
+	struct sysfs_dirent *dir_sd = kobj->sd;
+	struct sysfs_dirent *sd;
 
 	if (grp->name) {
-		dir = lookup_one_len_kern(grp->name, kobj->dentry,
-				strlen(grp->name));
-		BUG_ON(IS_ERR(dir));
-	}
-	else
-		dir = dget(kobj->dentry);
+		sd = sysfs_get_dirent(dir_sd, grp->name);
+		BUG_ON(!sd);
+	} else
+		sd = sysfs_get(dir_sd);
 
-	remove_files(dir,grp);
+	remove_files(sd, grp);
 	if (grp->name)
-		sysfs_remove_subdir(dir);
-	/* release the ref. taken in this routine */
-	dput(dir);
+		sysfs_remove_subdir(sd);
+
+	sysfs_put(sd);
 }
 
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bdd30e74de6..3756e152285 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,170 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
  */
 static struct lock_class_key sysfs_inode_imutex_key;
 
-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
+void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
-	struct inode * inode = new_inode(sysfs_sb);
-	if (inode) {
-		inode->i_blocks = 0;
-		inode->i_mapping->a_ops = &sysfs_aops;
-		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
-		inode->i_op = &sysfs_inode_operations;
-		lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
-
-		if (sd->s_iattr) {
-			/* sysfs_dirent has non-default attributes
-			 * get them for the new inode from persistent copy
-			 * in sysfs_dirent
-			 */
-			set_inode_attr(inode, sd->s_iattr);
-		} else
-			set_default_inode_attr(inode, mode);
-	}
-	return inode;
-}
-
-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
-{
-	int error = 0;
-	struct inode * inode = NULL;
-	if (dentry) {
-		if (!dentry->d_inode) {
-			struct sysfs_dirent * sd = dentry->d_fsdata;
-			if ((inode = sysfs_new_inode(mode, sd))) {
-				if (dentry->d_parent && dentry->d_parent->d_inode) {
-					struct inode *p_inode = dentry->d_parent->d_inode;
-					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
-				}
-				goto Proceed;
-			}
-			else 
-				error = -ENOMEM;
-		} else
-			error = -EEXIST;
-	} else 
-		error = -ENOENT;
-	goto Done;
-
- Proceed:
-	if (init)
-		error = init(inode);
-	if (!error) {
-		d_instantiate(dentry, inode);
-		if (S_ISDIR(mode))
-			dget(dentry);  /* pin only directory dentry in core */
+	inode->i_blocks = 0;
+	inode->i_mapping->a_ops = &sysfs_aops;
+	inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+	inode->i_op = &sysfs_inode_operations;
+	inode->i_ino = sd->s_ino;
+	lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
+
+	if (sd->s_iattr) {
+		/* sysfs_dirent has non-default attributes
+		 * get them for the new inode from persistent copy
+		 * in sysfs_dirent
+		 */
+		set_inode_attr(inode, sd->s_iattr);
 	} else
-		iput(inode);
- Done:
-	return error;
+		set_default_inode_attr(inode, sd->s_mode);
 }
 
-/*
- * Get the name for corresponding element represented by the given sysfs_dirent
+/**
+ *	sysfs_get_inode - get inode for sysfs_dirent
+ *	@sd: sysfs_dirent to allocate inode for
+ *
+ *	Get inode for @sd.  If such inode doesn't exist, a new inode
+ *	is allocated and basics are initialized.  New inode is
+ *	returned locked.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	Pointer to allocated inode on success, NULL on failure.
  */
-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
+struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
 {
-	struct attribute * attr;
-	struct bin_attribute * bin_attr;
-	struct sysfs_symlink  * sl;
-
-	BUG_ON(!sd || !sd->s_element);
-
-	switch (sd->s_type) {
-		case SYSFS_DIR:
-			/* Always have a dentry so use that */
-			return sd->s_dentry->d_name.name;
-
-		case SYSFS_KOBJ_ATTR:
-			attr = sd->s_element;
-			return attr->name;
+	struct inode *inode;
 
-		case SYSFS_KOBJ_BIN_ATTR:
-			bin_attr = sd->s_element;
-			return bin_attr->attr.name;
+	inode = iget_locked(sysfs_sb, sd->s_ino);
+	if (inode && (inode->i_state & I_NEW))
+		sysfs_init_inode(sd, inode);
 
-		case SYSFS_KOBJ_LINK:
-			sl = sd->s_element;
-			return sl->link_name;
-	}
-	return NULL;
-}
-
-static inline void orphan_all_buffers(struct inode *node)
-{
-	struct sysfs_buffer_collection *set;
-	struct sysfs_buffer *buf;
-
-	mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
-	set = node->i_private;
-	if (set) {
-		list_for_each_entry(buf, &set->associates, associates) {
-			down(&buf->sem);
-			buf->orphaned = 1;
-			up(&buf->sem);
-		}
-	}
-	mutex_unlock(&node->i_mutex);
+	return inode;
 }
 
-
-/*
- * Unhashes the dentry corresponding to given sysfs_dirent
- * Called with parent inode's i_mutex held.
+/**
+ *	sysfs_instantiate - instantiate dentry
+ *	@dentry: dentry to be instantiated
+ *	@inode: inode associated with @sd
+ *
+ *	Unlock @inode if locked and instantiate @dentry with @inode.
+ *
+ *	LOCKING:
+ *	None.
  */
-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
+void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
 {
-	struct dentry * dentry = sd->s_dentry;
-	struct inode *inode;
+	BUG_ON(!dentry || dentry->d_inode);
 
-	if (dentry) {
-		spin_lock(&dcache_lock);
-		spin_lock(&dentry->d_lock);
-		if (!(d_unhashed(dentry) && dentry->d_inode)) {
-			inode = dentry->d_inode;
-			spin_lock(&inode->i_lock);
-			__iget(inode);
-			spin_unlock(&inode->i_lock);
-			dget_locked(dentry);
-			__d_drop(dentry);
-			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-			simple_unlink(parent->d_inode, dentry);
-			orphan_all_buffers(inode);
-			iput(inode);
-		} else {
-			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-		}
-	}
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+
+	d_instantiate(dentry, inode);
 }
 
-int sysfs_hash_and_remove(struct dentry * dir, const char * name)
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
 {
-	struct sysfs_dirent * sd;
-	struct sysfs_dirent * parent_sd;
-	int found = 0;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent **pos, *sd;
 
-	if (!dir)
+	if (!dir_sd)
 		return -ENOENT;
 
-	if (dir->d_inode == NULL)
-		/* no inode means this hasn't been made visible yet */
-		return -ENOENT;
+	sysfs_addrm_start(&acxt, dir_sd);
+
+	for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
+		sd = *pos;
 
-	parent_sd = dir->d_fsdata;
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (!sd->s_element)
+		if (!sysfs_type(sd))
 			continue;
-		if (!strcmp(sysfs_get_name(sd), name)) {
-			list_del_init(&sd->s_sibling);
-			sysfs_drop_dentry(sd, dir);
-			sysfs_put(sd);
-			found = 1;
+		if (!strcmp(sd->s_name, name)) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			sysfs_remove_one(&acxt, sd);
 			break;
 		}
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return found ? 0 : -ENOENT;
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+	return -ENOENT;
 }
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 23a48a38e6a..402cc356203 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,27 +19,18 @@ struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
 struct kmem_cache *sysfs_dir_cachep;
 
-static void sysfs_clear_inode(struct inode *inode);
-
 static const struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= sysfs_delete_inode,
-	.clear_inode	= sysfs_clear_inode,
 };
 
-static struct sysfs_dirent sysfs_root = {
-	.s_sibling	= LIST_HEAD_INIT(sysfs_root.s_sibling),
-	.s_children	= LIST_HEAD_INIT(sysfs_root.s_children),
-	.s_element	= NULL,
-	.s_type		= SYSFS_ROOT,
-	.s_iattr	= NULL,
+struct sysfs_dirent sysfs_root = {
+	.s_count	= ATOMIC_INIT(1),
+	.s_flags	= SYSFS_ROOT,
+	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+	.s_ino		= 1,
 };
 
-static void sysfs_clear_inode(struct inode *inode)
-{
-	kfree(inode->i_private);
-}
-
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -52,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 	sysfs_sb = sb;
 
-	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
-				 &sysfs_root);
-	if (inode) {
-		inode->i_op = &sysfs_dir_inode_operations;
-		inode->i_fop = &sysfs_dir_operations;
-		/* directory inodes start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else {
+	inode = new_inode(sysfs_sb);
+	if (!inode) {
 		pr_debug("sysfs: could not get root inode\n");
 		return -ENOMEM;
 	}
 
+	sysfs_init_inode(&sysfs_root, inode);
+
+	inode->i_op = &sysfs_dir_inode_operations;
+	inode->i_fop = &sysfs_dir_operations;
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+
 	root = d_alloc_root(inode);
 	if (!root) {
 		pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
 		iput(inode);
 		return -ENOMEM;
 	}
+	sysfs_root.s_dentry = root;
 	root->d_fsdata = &sysfs_root;
 	sb->s_root = root;
 	return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde92..2f86e042229 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
 
 #include "sysfs.h"
 
-static int object_depth(struct kobject * kobj)
+static int object_depth(struct sysfs_dirent *sd)
 {
-	struct kobject * p = kobj;
 	int depth = 0;
-	do { depth++; } while ((p = p->parent));
+
+	for (; sd->s_parent; sd = sd->s_parent)
+		depth++;
+
 	return depth;
 }
 
-static int object_path_length(struct kobject * kobj)
+static int object_path_length(struct sysfs_dirent * sd)
 {
-	struct kobject * p = kobj;
 	int length = 1;
-	do {
-		length += strlen(kobject_name(p)) + 1;
-		p = p->parent;
-	} while (p);
+
+	for (; sd->s_parent; sd = sd->s_parent)
+		length += strlen(sd->s_name) + 1;
+
 	return length;
 }
 
-static void fill_object_path(struct kobject * kobj, char * buffer, int length)
+static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
 {
-	struct kobject * p;
-
 	--length;
-	for (p = kobj; p; p = p->parent) {
-		int cur = strlen(kobject_name(p));
+	for (; sd->s_parent; sd = sd->s_parent) {
+		int cur = strlen(sd->s_name);
 
 		/* back up enough to print this bus id with '/' */
 		length -= cur;
-		strncpy(buffer + length,kobject_name(p),cur);
+		strncpy(buffer + length, sd->s_name, cur);
 		*(buffer + --length) = '/';
 	}
 }
 
-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
-{
-	struct sysfs_dirent * parent_sd = parent->d_fsdata;
-	struct sysfs_symlink * sl;
-	int error = 0;
-
-	error = -ENOMEM;
-	sl = kmalloc(sizeof(*sl), GFP_KERNEL);
-	if (!sl)
-		goto exit1;
-
-	sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-	if (!sl->link_name)
-		goto exit2;
-
-	strcpy(sl->link_name, name);
-	sl->target_kobj = kobject_get(target);
-
-	error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
-				SYSFS_KOBJ_LINK);
-	if (!error)
-		return 0;
-
-	kobject_put(target);
-	kfree(sl->link_name);
-exit2:
-	kfree(sl);
-exit1:
-	return error;
-}
-
 /**
  *	sysfs_create_link - create symlink between two objects.
  *	@kobj:	object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
  */
 int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
-	struct dentry *dentry = NULL;
-	int error = -EEXIST;
+	struct sysfs_dirent *parent_sd = NULL;
+	struct sysfs_dirent *target_sd = NULL;
+	struct sysfs_dirent *sd = NULL;
+	struct sysfs_addrm_cxt acxt;
+	int error;
 
 	BUG_ON(!name);
 
 	if (!kobj) {
 		if (sysfs_mount && sysfs_mount->mnt_sb)
-			dentry = sysfs_mount->mnt_sb->s_root;
+			parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
 	} else
-		dentry = kobj->dentry;
+		parent_sd = kobj->sd;
+
+	error = -EFAULT;
+	if (!parent_sd)
+		goto out_put;
+
+	/* target->sd can go away beneath us but is protected with
+	 * sysfs_assoc_lock.  Fetch target_sd from it.
+	 */
+	spin_lock(&sysfs_assoc_lock);
+	if (target->sd)
+		target_sd = sysfs_get(target->sd);
+	spin_unlock(&sysfs_assoc_lock);
+
+	error = -ENOENT;
+	if (!target_sd)
+		goto out_put;
+
+	error = -ENOMEM;
+	sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
+	if (!sd)
+		goto out_put;
+	sd->s_elem.symlink.target_sd = target_sd;
 
-	if (!dentry)
-		return -EFAULT;
+	sysfs_addrm_start(&acxt, parent_sd);
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
-		error = sysfs_add_link(dentry, name, target);
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	if (!sysfs_find_dirent(parent_sd, name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+
+	error = -EEXIST;
+	/* fall through */
+ out_put:
+	sysfs_put(target_sd);
+	sysfs_put(sd);
 	return error;
 }
 
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
-	sysfs_hash_and_remove(kobj->dentry,name);
+	sysfs_hash_and_remove(kobj->sd, name);
 }
 
-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
-				 char *path)
+static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
+				 struct sysfs_dirent * target_sd, char *path)
 {
 	char * s;
 	int depth, size;
 
-	depth = object_depth(kobj);
-	size = object_path_length(target) + depth * 3 - 1;
+	depth = object_depth(parent_sd);
+	size = object_path_length(target_sd) + depth * 3 - 1;
 	if (size > PATH_MAX)
 		return -ENAMETOOLONG;
 
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
 	for (s = path; depth--; s += 3)
 		strcpy(s,"../");
 
-	fill_object_path(target, path, size);
+	fill_object_path(target_sd, path, size);
 	pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
 
 	return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
 
 static int sysfs_getlink(struct dentry *dentry, char * path)
 {
-	struct kobject *kobj, *target_kobj;
-	int error = 0;
+	struct sysfs_dirent *sd = dentry->d_fsdata;
+	struct sysfs_dirent *parent_sd = sd->s_parent;
+	struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
+	int error;
 
-	kobj = sysfs_get_kobject(dentry->d_parent);
-	if (!kobj)
-		return -EINVAL;
+	mutex_lock(&sysfs_mutex);
+	error = sysfs_get_target_path(parent_sd, target_sd, path);
+	mutex_unlock(&sysfs_mutex);
 
-	target_kobj = sysfs_get_kobject(dentry);
-	if (!target_kobj) {
-		kobject_put(kobj);
-		return -EINVAL;
-	}
-
-	down_read(&sysfs_rename_sem);
-	error = sysfs_get_target_path(kobj, target_kobj, path);
-	up_read(&sysfs_rename_sem);
-	
-	kobject_put(kobj);
-	kobject_put(target_kobj);
 	return error;
-
 }
 
 static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a77c57e5a6d..6a37f2386a8 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,38 +1,101 @@
+struct sysfs_elem_dir {
+	struct kobject		* kobj;
+};
+
+struct sysfs_elem_symlink {
+	struct sysfs_dirent	* target_sd;
+};
+
+struct sysfs_elem_attr {
+	struct attribute	* attr;
+};
+
+struct sysfs_elem_bin_attr {
+	struct bin_attribute	* bin_attr;
+};
+
+/*
+ * As long as s_count reference is held, the sysfs_dirent itself is
+ * accessible.  Dereferencing s_elem or any other outer entity
+ * requires s_active reference.
+ */
 struct sysfs_dirent {
 	atomic_t		s_count;
-	struct list_head	s_sibling;
-	struct list_head	s_children;
-	void 			* s_element;
-	int			s_type;
+	atomic_t		s_active;
+	struct sysfs_dirent	* s_parent;
+	struct sysfs_dirent	* s_sibling;
+	struct sysfs_dirent	* s_children;
+	const char		* s_name;
+
+	union {
+		struct sysfs_elem_dir		dir;
+		struct sysfs_elem_symlink	symlink;
+		struct sysfs_elem_attr		attr;
+		struct sysfs_elem_bin_attr	bin_attr;
+	}			s_elem;
+
+	unsigned int		s_flags;
 	umode_t			s_mode;
+	ino_t			s_ino;
 	struct dentry		* s_dentry;
 	struct iattr		* s_iattr;
 	atomic_t		s_event;
 };
 
+#define SD_DEACTIVATED_BIAS	INT_MIN
+
+struct sysfs_addrm_cxt {
+	struct sysfs_dirent	*parent_sd;
+	struct inode		*parent_inode;
+	struct sysfs_dirent	*removed;
+	int			cnt;
+};
+
 extern struct vfsmount * sysfs_mount;
+extern struct sysfs_dirent sysfs_root;
 extern struct kmem_cache *sysfs_dir_cachep;
 
-extern void sysfs_delete_inode(struct inode *inode);
-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+extern void sysfs_link_sibling(struct sysfs_dirent *sd);
+extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
+extern void sysfs_put_active(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
+extern void sysfs_put_active_two(struct sysfs_dirent *sd);
+extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+			      struct sysfs_dirent *parent_sd);
+extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
+			  struct sysfs_dirent *sd);
+extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
+			     struct sysfs_dirent *sd);
+extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
 
-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
-				umode_t, int);
-
-extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern void sysfs_delete_inode(struct inode *inode);
+extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
+extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
+extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
+
+extern void release_sysfs_dirent(struct sysfs_dirent * sd);
+extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+					      const unsigned char *name);
+extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+					     const unsigned char *name);
+extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
+					     int type);
+
+extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
+			  const struct attribute *attr, int type);
+extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
 
-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
-extern void sysfs_remove_subdir(struct dentry *);
+extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
+			       struct sysfs_dirent **p_sd);
+extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
 
-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
 extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
-extern struct rw_semaphore sysfs_rename_sem;
+extern spinlock_t sysfs_assoc_lock;
+extern struct mutex sysfs_mutex;
 extern struct super_block * sysfs_sb;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct file_operations sysfs_file_operations;
@@ -40,73 +103,9 @@ extern const struct file_operations bin_fops;
 extern const struct inode_operations sysfs_dir_inode_operations;
 extern const struct inode_operations sysfs_symlink_inode_operations;
 
-struct sysfs_symlink {
-	char * link_name;
-	struct kobject * target_kobj;
-};
-
-struct sysfs_buffer {
-	struct list_head		associates;
-	size_t				count;
-	loff_t				pos;
-	char				* page;
-	struct sysfs_ops		* ops;
-	struct semaphore		sem;
-	int				orphaned;
-	int				needs_read_fill;
-	int				event;
-};
-
-struct sysfs_buffer_collection {
-	struct list_head	associates;
-};
-
-static inline struct kobject * to_kobj(struct dentry * dentry)
-{
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct kobject *) sd->s_element);
-}
-
-static inline struct attribute * to_attr(struct dentry * dentry)
+static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct attribute *) sd->s_element);
-}
-
-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
-{
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct bin_attribute *) sd->s_element);
-}
-
-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
-{
-	struct kobject * kobj = NULL;
-
-	spin_lock(&dcache_lock);
-	if (!d_unhashed(dentry)) {
-		struct sysfs_dirent * sd = dentry->d_fsdata;
-		if (sd->s_type & SYSFS_KOBJ_LINK) {
-			struct sysfs_symlink * sl = sd->s_element;
-			kobj = kobject_get(sl->target_kobj);
-		} else
-			kobj = kobject_get(sd->s_element);
-	}
-	spin_unlock(&dcache_lock);
-
-	return kobj;
-}
-
-static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
-{
-	if (sd->s_type & SYSFS_KOBJ_LINK) {
-		struct sysfs_symlink * sl = sd->s_element;
-		kfree(sl->link_name);
-		kobject_put(sl->target_kobj);
-		kfree(sl);
-	}
-	kfree(sd->s_iattr);
-	kmem_cache_free(sysfs_dir_cachep, sd);
+	return sd->s_flags & SYSFS_TYPE_MASK;
 }
 
 static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -120,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
 
 static inline void sysfs_put(struct sysfs_dirent * sd)
 {
-	if (atomic_dec_and_test(&sd->s_count))
+	if (sd && atomic_dec_and_test(&sd->s_count))
 		release_sysfs_dirent(sd);
 }
 
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020..589be21d884 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.fsync		= sysv_sync_file,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/crc.c b/fs/udf/crc.c
index 1b82a4adc2f..ef2bfaa19d7 100644
--- a/fs/udf/crc.c
+++ b/fs/udf/crc.c
@@ -106,8 +106,8 @@ int main(void)
 {
 	unsigned short x;
 
-	x = udf_crc16(bytes, sizeof bytes);
-	printf("udf_crc16: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U);
+	x = udf_crc(bytes, sizeof bytes);
+	printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U);
 
 	return 0;
 }
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e..df070bee8d4 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
 	.aio_write		= udf_file_aio_write,
 	.release		= udf_release_file,
 	.fsync			= udf_fsync_file,
-	.sendfile		= generic_file_sendfile,
+	.splice_read		= generic_file_splice_read,
 };
 
 const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 8206983f2eb..10f3188738a 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -50,7 +50,7 @@ void udf_free_inode(struct inode * inode)
 		else
 			UDF_SB_LVIDIU(sb)->numFiles =
 				cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1);
-		
+
 		mark_buffer_dirty(sbi->s_lvidbh);
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
@@ -136,6 +136,13 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 		UDF_I_EFE(inode) = 0;
 		UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
 	}
+	if (!UDF_I_DATA(inode))
+	{
+		iput(inode);
+		*err = -ENOMEM;
+		mutex_unlock(&sbi->s_alloc_mutex);
+		return NULL;
+	}
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
 		UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
 	else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1f0129405cf..5b82e489af7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -49,6 +49,7 @@ MODULE_LICENSE("GPL");
 static mode_t udf_convert_permissions(struct fileEntry *);
 static int udf_update_inode(struct inode *, int);
 static void udf_fill_inode(struct inode *, struct buffer_head *);
+static int udf_alloc_i_data(struct inode *inode, size_t size);
 static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
 	long *, int *);
 static int8_t udf_insert_aext(struct inode *, struct extent_position,
@@ -100,14 +101,23 @@ no_delete:
 	clear_inode(inode);
 }
 
+/*
+ * If we are going to release inode from memory, we discard preallocation and
+ * truncate last inode extent to proper length. We could use drop_inode() but
+ * it's called under inode_lock and thus we cannot mark inode dirty there.  We
+ * use clear_inode() but we have to make sure to write inode as it's not written
+ * automatically.
+ */
 void udf_clear_inode(struct inode *inode)
 {
 	if (!(inode->i_sb->s_flags & MS_RDONLY)) {
 		lock_kernel();
+		/* Discard preallocation for directories, symlinks, etc. */
 		udf_discard_prealloc(inode);
+		udf_truncate_tail_extent(inode);
 		unlock_kernel();
+		write_inode_now(inode, 1);
 	}
-
 	kfree(UDF_I_DATA(inode));
 	UDF_I_DATA(inode) = NULL;
 }
@@ -725,7 +735,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, int newbl
 			(*c) ++;
 			(*endnum) ++;
 		}
-		
+
 		laarr[curr].extLocation.logicalBlockNum = newblocknum;
 		if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
 			laarr[curr].extLocation.partitionReferenceNum =
@@ -827,7 +837,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
 				{
 					numalloc -= elen;
 					if (*endnum > (i+1))
-						memmove(&laarr[i], &laarr[i+1], 
+						memmove(&laarr[i], &laarr[i+1],
 							sizeof(long_ad) * (*endnum - (i+1)));
 					i --;
 					(*endnum) --;
@@ -1015,7 +1025,7 @@ void udf_truncate(struct inode * inode)
 	{
 		block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block);
 		udf_truncate_extents(inode);
-	}	
+	}
 
 	inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
 	if (IS_SYNC(inode))
@@ -1078,10 +1088,10 @@ __udf_read_inode(struct inode *inode)
 			{
 				kernel_lb_addr loc;
 				ie = (struct indirectEntry *)ibh->b_data;
-	
+
 				loc = lelb_to_cpu(ie->indirectICB.extLocation);
-	
-				if (ie->indirectICB.extLength && 
+
+				if (ie->indirectICB.extLength &&
 					(nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident)))
 				{
 					if (ident == TAG_IDENT_FE ||
@@ -1147,14 +1157,22 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	{
 		UDF_I_EFE(inode) = 1;
 		UDF_I_USE(inode) = 0;
-		UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL);
+		if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)))
+		{
+			make_bad_inode(inode);
+			return;
+		}
 		memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry));
 	}
 	else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE)
 	{
 		UDF_I_EFE(inode) = 0;
 		UDF_I_USE(inode) = 0;
-		UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
+		if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry)))
+		{
+			make_bad_inode(inode);
+			return;
+		}
 		memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry));
 	}
 	else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE)
@@ -1164,7 +1182,11 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		UDF_I_LENALLOC(inode) =
 			le32_to_cpu(
 				((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs);
-		UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry), GFP_KERNEL);
+		if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)))
+		{
+			make_bad_inode(inode);
+			return;
+		}
 		memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry));
 		return;
 	}
@@ -1182,7 +1204,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
 	if (!inode->i_nlink)
 		inode->i_nlink = 1;
-	
+
 	inode->i_size = le64_to_cpu(fe->informationLength);
 	UDF_I_LENEXTENTS(inode) = inode->i_size;
 
@@ -1234,7 +1256,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	}
 	else
 	{
-		inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << 
+		inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
 			(inode->i_sb->s_blocksize_bits - 9);
 
 		if ( udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1365,6 +1387,20 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	}
 }
 
+static int udf_alloc_i_data(struct inode *inode, size_t size)
+{
+	UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL);
+
+	if (!UDF_I_DATA(inode))
+	{
+		printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n",
+		       inode->i_ino);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static mode_t
 udf_convert_permissions(struct fileEntry *fe)
 {
@@ -2063,7 +2099,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
 			mark_buffer_dirty_inode(oepos.bh, inode);
 		}
 	}
-	
+
 	brelse(epos.bh);
 	brelse(oepos.bh);
 	return (elen >> 30);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 77975ae291a..60d27764424 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -61,7 +61,11 @@ static void extent_trunc(struct inode * inode, struct extent_position *epos,
 	}
 }
 
-void udf_discard_prealloc(struct inode * inode)
+/*
+ * Truncate the last extent to match i_size. This function assumes
+ * that preallocation extent is already truncated.
+ */
+void udf_truncate_tail_extent(struct inode *inode)
 {
 	struct extent_position epos = { NULL, 0, {0, 0}};
 	kernel_lb_addr eloc;
@@ -71,7 +75,10 @@ void udf_discard_prealloc(struct inode * inode)
 	int adsize;
 
 	if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
-		inode->i_size == UDF_I_LENEXTENTS(inode))
+	    inode->i_size == UDF_I_LENEXTENTS(inode))
+		return;
+	/* Are we going to delete the file anyway? */
+	if (inode->i_nlink == 0)
 		return;
 
 	if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
@@ -79,36 +86,76 @@ void udf_discard_prealloc(struct inode * inode)
 	else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
 		adsize = sizeof(long_ad);
 	else
-		adsize = 0;
-
-	epos.block = UDF_I_LOCATION(inode);
+		BUG();
 
 	/* Find the last extent in the file */
 	while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1)
 	{
 		etype = netype;
 		lbcount += elen;
-		if (lbcount > inode->i_size && lbcount - elen < inode->i_size)
-		{
-			WARN_ON(lbcount - inode->i_size >= inode->i_sb->s_blocksize);
+		if (lbcount > inode->i_size) {
+			if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
+				printk(KERN_WARNING
+				       "udf_truncate_tail_extent(): Too long "
+				       "extent after EOF in inode %u: i_size: "
+				       "%Ld lbcount: %Ld extent %u+%u\n",
+				       (unsigned)inode->i_ino,
+				       (long long)inode->i_size,
+				       (long long)lbcount,
+				       (unsigned)eloc.logicalBlockNum,
+				       (unsigned)elen);
 			nelen = elen - (lbcount - inode->i_size);
 			epos.offset -= adsize;
 			extent_trunc(inode, &epos, eloc, etype, elen, nelen);
 			epos.offset += adsize;
-			lbcount = inode->i_size;
+			if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
+				printk(KERN_ERR "udf_truncate_tail_extent(): "
+				       "Extent after EOF in inode %u.\n",
+				       (unsigned)inode->i_ino);
+			break;
 		}
 	}
+	/* This inode entry is in-memory only and thus we don't have to mark
+	 * the inode dirty */
+	UDF_I_LENEXTENTS(inode) = inode->i_size;
+	brelse(epos.bh);
+}
+
+void udf_discard_prealloc(struct inode *inode)
+{
+	struct extent_position epos = { NULL, 0, {0, 0}};
+	kernel_lb_addr eloc;
+	uint32_t elen;
+	uint64_t lbcount = 0;
+	int8_t etype = -1, netype;
+	int adsize;
+
+	if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
+		inode->i_size == UDF_I_LENEXTENTS(inode))
+		return;
+
+	if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
+		adsize = sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
+		adsize = sizeof(long_ad);
+	else
+		adsize = 0;
+
+	epos.block = UDF_I_LOCATION(inode);
+
+	/* Find the last extent in the file */
+	while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
+		etype = netype;
+		lbcount += elen;
+	}
 	if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
 		epos.offset -= adsize;
 		lbcount -= elen;
 		extent_trunc(inode, &epos, eloc, etype, elen, 0);
-		if (!epos.bh)
-		{
+		if (!epos.bh) {
 			UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode);
 			mark_inode_dirty(inode);
-		}
-		else
-		{
+		} else {
 			struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
 			aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc));
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
@@ -118,9 +165,9 @@ void udf_discard_prealloc(struct inode * inode)
 			mark_buffer_dirty_inode(epos.bh, inode);
 		}
 	}
+	/* This inode entry is in-memory only and thus we don't have to mark
+	 * the inode dirty */
 	UDF_I_LENEXTENTS(inode) = lbcount;
-
-	WARN_ON(lbcount != inode->i_size);
 	brelse(epos.bh);
 }
 
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 67ded289497..f581f2f69c0 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -146,6 +146,7 @@ extern void udf_free_inode(struct inode *);
 extern struct inode * udf_new_inode (struct inode *, int, int *);
 
 /* truncate.c */
+extern void udf_truncate_tail_extent(struct inode *);
 extern void udf_discard_prealloc(struct inode *);
 extern void udf_truncate_extents(struct inode *);
 
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad..6705d74c6d2 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
 	.fsync		= ufs_sync_file,
-	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 22ff6ed55ce..2b3011689e8 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -87,6 +87,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/log2.h>
 
 #include "swab.h"
 #include "util.h"
@@ -854,7 +855,7 @@ magic_found:
 	uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask);
 	uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
 
-	if (uspi->s_fsize & (uspi->s_fsize - 1)) {
+	if (!is_power_of_2(uspi->s_fsize)) {
 		printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
 			uspi->s_fsize);
 			goto failed;
@@ -869,7 +870,7 @@ magic_found:
 			uspi->s_fsize);
 		goto failed;
 	}
-	if (uspi->s_bsize & (uspi->s_bsize - 1)) {
+	if (!is_power_of_2(uspi->s_bsize)) {
 		printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
 			uspi->s_bsize);
 		goto failed;
diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8c29d..b3c88952465 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                 if (IS_IMMUTABLE(inode))
                         goto dput_and_out;
 
-		if (current->fsuid != inode->i_uid &&
-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-			goto dput_and_out;
+		if (current->fsuid != inode->i_uid) {
+			if (f) {
+				if (!(f->f_mode & FMODE_WRITE))
+					goto dput_and_out;
+			} else {
+				error = vfs_permission(&nd, MAY_WRITE);
+				if (error)
+					goto dput_and_out;
+			}
+		}
 	}
 	mutex_lock(&inode->i_mutex);
 	error = notify_change(dentry, &newattrs);
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index b49989bb89a..e7a9a83f008 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -64,6 +64,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_dir2_sf.o \
 				   xfs_error.o \
 				   xfs_extfree_item.o \
+				   xfs_filestream.o \
 				   xfs_fsops.o \
 				   xfs_ialloc.o \
 				   xfs_ialloc_btree.o \
@@ -77,6 +78,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_log.o \
 				   xfs_log_recover.o \
 				   xfs_mount.o \
+				   xfs_mru_cache.o \
 				   xfs_rename.o \
 				   xfs_trans.o \
 				   xfs_trans_ail.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 9ebabdf7829..4b6470cf87f 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 
-/*
- * Low memory cache shrinkers
- */
-
-typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, gfp_t);
-
-static inline kmem_shaker_t
-kmem_shake_register(kmem_shake_func_t sfunc)
-{
-	return set_shrinker(DEFAULT_SEEKS, sfunc);
-}
-
-static inline void
-kmem_shake_deregister(kmem_shaker_t shrinker)
-{
-	remove_shrinker(shrinker);
-}
-
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7361861e3aa..fd4105d662e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -108,14 +108,19 @@ xfs_page_trace(
 
 /*
  * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend.
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
  */
 STATIC void
 xfs_finish_ioend(
-	xfs_ioend_t		*ioend)
+	xfs_ioend_t	*ioend,
+	int		wait)
 {
-	if (atomic_dec_and_test(&ioend->io_remaining))
+	if (atomic_dec_and_test(&ioend->io_remaining)) {
 		queue_work(xfsdatad_workqueue, &ioend->io_work);
+		if (wait)
+			flush_workqueue(xfsdatad_workqueue);
+	}
 }
 
 /*
@@ -156,6 +161,8 @@ xfs_setfilesize(
 	xfs_fsize_t		bsize;
 
 	ip = xfs_vtoi(ioend->io_vnode);
+	if (!ip)
+		return;
 
 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
 	ASSERT(ioend->io_type != IOMAP_READ);
@@ -334,7 +341,7 @@ xfs_end_bio(
 	bio->bi_end_io = NULL;
 	bio_put(bio);
 
-	xfs_finish_ioend(ioend);
+	xfs_finish_ioend(ioend, 0);
 	return 0;
 }
 
@@ -470,7 +477,7 @@ xfs_submit_ioend(
 		}
 		if (bio)
 			xfs_submit_ioend_bio(ioend, bio);
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	} while ((ioend = next) != NULL);
 }
 
@@ -1003,6 +1010,8 @@ xfs_page_state_convert(
 		if (buffer_unwritten(bh) || buffer_delay(bh) ||
 		    ((buffer_uptodate(bh) || PageUptodate(page)) &&
 		     !buffer_mapped(bh) && (unmapped || startio))) {
+			int new_ioend = 0;
+
 			/*
 			 * Make sure we don't use a read-only iomap
 			 */
@@ -1021,6 +1030,15 @@ xfs_page_state_convert(
 			}
 
 			if (!iomap_valid) {
+				/*
+				 * if we didn't have a valid mapping then we
+				 * need to ensure that we put the new mapping
+				 * in a new ioend structure. This needs to be
+				 * done to ensure that the ioends correctly
+				 * reflect the block mappings at io completion
+				 * for unwritten extent conversion.
+				 */
+				new_ioend = 1;
 				if (type == IOMAP_NEW) {
 					size = xfs_probe_cluster(inode,
 							page, bh, head, 0);
@@ -1040,7 +1058,7 @@ xfs_page_state_convert(
 				if (startio) {
 					xfs_add_to_ioend(inode, bh, offset,
 							type, &ioend,
-							!iomap_valid);
+							new_ioend);
 				} else {
 					set_buffer_dirty(bh);
 					unlock_buffer(bh);
@@ -1416,6 +1434,13 @@ xfs_end_io_direct(
 	 * This is not necessary for synchronous direct I/O, but we do
 	 * it anyway to keep the code uniform and simpler.
 	 *
+	 * Well, if only it were that simple. Because synchronous direct I/O
+	 * requires extent conversion to occur *before* we return to userspace,
+	 * we have to wait for extent conversion to complete. Look at the
+	 * iocb that has been passed to us to determine if this is AIO or
+	 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
+	 * workqueue and wait for it to complete.
+	 *
 	 * The core direct I/O code might be changed to always call the
 	 * completion handler in the future, in which case all this can
 	 * go away.
@@ -1423,9 +1448,9 @@ xfs_end_io_direct(
 	ioend->io_offset = offset;
 	ioend->io_size = size;
 	if (ioend->io_type == IOMAP_READ) {
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	} else if (private && size > 0) {
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
 	} else {
 		/*
 		 * A direct I/O write ioend starts it's life in unwritten
@@ -1434,7 +1459,7 @@ xfs_end_io_direct(
 		 * handler.
 		 */
 		INIT_WORK(&ioend->io_work, xfs_end_bio_written);
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	}
 
 	/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af1..2df63622354 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -35,7 +35,7 @@
 #include <linux/freezer.h>
 
 static kmem_zone_t *xfs_buf_zone;
-static kmem_shaker_t xfs_buf_shake;
+static struct shrinker *xfs_buf_shake;
 STATIC int xfsbufd(void *);
 STATIC int xfsbufd_wakeup(int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
@@ -314,7 +314,7 @@ xfs_buf_free(
 
 	ASSERT(list_empty(&bp->b_hash_list));
 
-	if (bp->b_flags & _XBF_PAGE_CACHE) {
+	if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
 		uint		i;
 
 		if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
 		for (i = 0; i < bp->b_page_count; i++) {
 			struct page	*page = bp->b_pages[i];
 
-			ASSERT(!PagePrivate(page));
+			if (bp->b_flags & _XBF_PAGE_CACHE)
+				ASSERT(!PagePrivate(page));
 			page_cache_release(page);
 		}
 		_xfs_buf_free_pages(bp);
-	} else if (bp->b_flags & _XBF_KMEM_ALLOC) {
-		 /*
-		  * XXX(hch): bp->b_count_desired might be incorrect (see
-		  * xfs_buf_associate_memory for details), but fortunately
-		  * the Linux version of kmem_free ignores the len argument..
-		  */
-		kmem_free(bp->b_addr, bp->b_count_desired);
-		_xfs_buf_free_pages(bp);
 	}
 
 	xfs_buf_deallocate(bp);
@@ -764,43 +757,44 @@ xfs_buf_get_noaddr(
 	size_t			len,
 	xfs_buftarg_t		*target)
 {
-	size_t			malloc_len = len;
+	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+	int			error, i;
 	xfs_buf_t		*bp;
-	void			*data;
-	int			error;
 
 	bp = xfs_buf_allocate(0);
 	if (unlikely(bp == NULL))
 		goto fail;
 	_xfs_buf_initialize(bp, target, 0, len, 0);
 
- try_again:
-	data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
-	if (unlikely(data == NULL))
+	error = _xfs_buf_get_pages(bp, page_count, 0);
+	if (error)
 		goto fail_free_buf;
 
-	/* check whether alignment matches.. */
-	if ((__psunsigned_t)data !=
-	    ((__psunsigned_t)data & ~target->bt_smask)) {
-		/* .. else double the size and try again */
-		kmem_free(data, malloc_len);
-		malloc_len <<= 1;
-		goto try_again;
+	for (i = 0; i < page_count; i++) {
+		bp->b_pages[i] = alloc_page(GFP_KERNEL);
+		if (!bp->b_pages[i])
+			goto fail_free_mem;
 	}
+	bp->b_flags |= _XBF_PAGES;
 
-	error = xfs_buf_associate_memory(bp, data, len);
-	if (error)
+	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+	if (unlikely(error)) {
+		printk(KERN_WARNING "%s: failed to map pages\n",
+				__FUNCTION__);
 		goto fail_free_mem;
-	bp->b_flags |= _XBF_KMEM_ALLOC;
+	}
 
 	xfs_buf_unlock(bp);
 
-	XB_TRACE(bp, "no_daddr", data);
+	XB_TRACE(bp, "no_daddr", len);
 	return bp;
+
  fail_free_mem:
-	kmem_free(data, malloc_len);
+	while (--i >= 0)
+		__free_page(bp->b_pages[i]);
+	_xfs_buf_free_pages(bp);
  fail_free_buf:
-	xfs_buf_free(bp);
+	xfs_buf_deallocate(bp);
  fail:
 	return NULL;
 }
@@ -1453,6 +1447,7 @@ xfs_free_buftarg(
 	int			external)
 {
 	xfs_flush_buftarg(btp, 1);
+	xfs_blkdev_issue_flush(btp);
 	if (external)
 		xfs_blkdev_put(btp->bt_bdev);
 	xfs_free_bufhash(btp);
@@ -1837,7 +1832,7 @@ xfs_buf_init(void)
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-	xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
+	xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
 	if (!xfs_buf_shake)
 		goto out_destroy_xfsdatad_workqueue;
 
@@ -1859,7 +1854,7 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-	kmem_shake_deregister(xfs_buf_shake);
+	remove_shrinker(xfs_buf_shake);
 	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a..b5908a34b15 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
 
 	/* flags used only internally */
 	_XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
-	_XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
+	_XBF_PAGES = (1 << 18),	    /* backed by refcounted pages	   */
 	_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
 	_XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
 } xfs_buf_flags_t;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc96135..cbcd40c8c2a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
 }
 
 STATIC ssize_t
-xfs_file_sendfile(
-	struct file		*filp,
-	loff_t			*pos,
-	size_t			count,
-	read_actor_t		actor,
-	void			*target)
-{
-	return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-				filp, pos, 0, count, actor, target, NULL);
-}
-
-STATIC ssize_t
-xfs_file_sendfile_invis(
-	struct file		*filp,
-	loff_t			*pos,
-	size_t			count,
-	read_actor_t		actor,
-	void			*target)
-{
-	return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-				filp, pos, IO_INVIS, count, actor, target, NULL);
-}
-
-STATIC ssize_t
 xfs_file_splice_read(
 	struct file		*infilp,
 	loff_t			*ppos,
@@ -208,15 +184,6 @@ xfs_file_open(
 }
 
 STATIC int
-xfs_file_close(
-	struct file	*filp,
-	fl_owner_t	id)
-{
-	return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
-				file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
-}
-
-STATIC int
 xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
@@ -452,7 +419,6 @@ const struct file_operations xfs_file_operations = {
 	.write		= do_sync_write,
 	.aio_read	= xfs_file_aio_read,
 	.aio_write	= xfs_file_aio_write,
-	.sendfile	= xfs_file_sendfile,
 	.splice_read	= xfs_file_splice_read,
 	.splice_write	= xfs_file_splice_write,
 	.unlocked_ioctl	= xfs_file_ioctl,
@@ -461,7 +427,6 @@ const struct file_operations xfs_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
-	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
@@ -475,7 +440,6 @@ const struct file_operations xfs_invis_file_operations = {
 	.write		= do_sync_write,
 	.aio_read	= xfs_file_aio_read_invis,
 	.aio_write	= xfs_file_aio_write_invis,
-	.sendfile	= xfs_file_sendfile_invis,
 	.splice_read	= xfs_file_splice_read_invis,
 	.splice_write	= xfs_file_splice_write_invis,
 	.unlocked_ioctl	= xfs_file_ioctl_invis,
@@ -484,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
-	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 };
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ed3a5e1b4b6..bb72c3d4141 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
 	.inherit_nosym	= {	0,		0,		1	},
 	.rotorstep	= {	1,		1,		255	},
 	.inherit_nodfrg	= {	0,		1,		1	},
+	.fstrm_timer	= {	1,		50,		3600*100},
 };
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ff5c41ff8d4..5917808abbd 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
 
 	if (cmd == XFS_IOC_FSINUMBERS)
 		error = xfs_inumbers(mp, &inlast, &count,
-						bulkreq.ubuffer);
+					bulkreq.ubuffer, xfs_inumbers_fmt);
 	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
 		error = xfs_bulkstat_single(mp, &inlast,
 						bulkreq.ubuffer, &done);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b83cebc165f..141cf15067c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -23,10 +23,25 @@
 #include <linux/fs.h>
 #include <asm/uaccess.h>
 #include "xfs.h"
-#include "xfs_types.h"
 #include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_vfs.h"
 #include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
 #include "xfs_dfrag.h"
 
 #define  _NATIVE_IOC(cmd, type) \
@@ -34,6 +49,7 @@
 
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 #define BROKEN_X86_ALIGNMENT
+#define _PACKED __attribute__((packed))
 /* on ia32 l_start is on a 32-bit boundary */
 typedef struct xfs_flock64_32 {
 	__s16		l_type;
@@ -75,35 +91,276 @@ xfs_ioctl32_flock(
 	return (unsigned long)p;
 }
 
+typedef struct compat_xfs_fsop_geom_v1 {
+	__u32		blocksize;	/* filesystem (data) block size */
+	__u32		rtextsize;	/* realtime extent size		*/
+	__u32		agblocks;	/* fsblocks in an AG		*/
+	__u32		agcount;	/* number of allocation groups	*/
+	__u32		logblocks;	/* fsblocks in the log		*/
+	__u32		sectsize;	/* (data) sector size, bytes	*/
+	__u32		inodesize;	/* inode size in bytes		*/
+	__u32		imaxpct;	/* max allowed inode space(%)	*/
+	__u64		datablocks;	/* fsblocks in data subvolume	*/
+	__u64		rtblocks;	/* fsblocks in realtime subvol	*/
+	__u64		rtextents;	/* rt extents in realtime subvol*/
+	__u64		logstart;	/* starting fsblock of the log	*/
+	unsigned char	uuid[16];	/* unique id of the filesystem	*/
+	__u32		sunit;		/* stripe unit, fsblocks	*/
+	__u32		swidth;		/* stripe width, fsblocks	*/
+	__s32		version;	/* structure version		*/
+	__u32		flags;		/* superblock version flags	*/
+	__u32		logsectsize;	/* log sector size, bytes	*/
+	__u32		rtsectsize;	/* realtime sector size, bytes	*/
+	__u32		dirblocksize;	/* directory block size, bytes	*/
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+	_IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
+
+STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
+{
+	compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
+	xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(p, p32, sizeof(*p32)))
+		return -EFAULT;
+	return (unsigned long)p;
+}
+
+typedef struct compat_xfs_inogrp {
+	__u64		xi_startino;	/* starting inode number	*/
+	__s32		xi_alloccount;	/* # bits set in allocmask	*/
+	__u64		xi_allocmask;	/* mask of allocated inodes	*/
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+STATIC int xfs_inumbers_fmt_compat(
+	void __user *ubuffer,
+	const xfs_inogrp_t *buffer,
+	long count,
+	long *written)
+{
+	compat_xfs_inogrp_t *p32 = ubuffer;
+	long i;
+
+	for (i = 0; i < count; i++) {
+		if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+		    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+		    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+			return -EFAULT;
+	}
+	*written = count * sizeof(*p32);
+	return 0;
+}
+
 #else
 
-typedef struct xfs_fsop_bulkreq32 {
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define _PACKED
+
+#endif
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+typedef struct compat_xfs_bstime {
+	__s32		tv_sec;		/* seconds		*/
+	__s32		tv_nsec;	/* and nanoseconds	*/
+} compat_xfs_bstime_t;
+
+STATIC int xfs_bstime_store_compat(
+	compat_xfs_bstime_t __user *p32,
+	const xfs_bstime_t *p)
+{
+	__s32 sec32;
+
+	sec32 = p->tv_sec;
+	if (put_user(sec32, &p32->tv_sec) ||
+	    put_user(p->tv_nsec, &p32->tv_nsec))
+		return -EFAULT;
+	return 0;
+}
+
+typedef struct compat_xfs_bstat {
+	__u64		bs_ino;		/* inode number			*/
+	__u16		bs_mode;	/* type and mode		*/
+	__u16		bs_nlink;	/* number of links		*/
+	__u32		bs_uid;		/* user id			*/
+	__u32		bs_gid;		/* group id			*/
+	__u32		bs_rdev;	/* device value			*/
+	__s32		bs_blksize;	/* block size			*/
+	__s64		bs_size;	/* file size			*/
+	compat_xfs_bstime_t bs_atime;	/* access time			*/
+	compat_xfs_bstime_t bs_mtime;	/* modify time			*/
+	compat_xfs_bstime_t bs_ctime;	/* inode change time		*/
+	int64_t		bs_blocks;	/* number of blocks		*/
+	__u32		bs_xflags;	/* extended flags		*/
+	__s32		bs_extsize;	/* extent size			*/
+	__s32		bs_extents;	/* number of extents		*/
+	__u32		bs_gen;		/* generation count		*/
+	__u16		bs_projid;	/* project id			*/
+	unsigned char	bs_pad[14];	/* pad space, unused		*/
+	__u32		bs_dmevmask;	/* DMIG event mask		*/
+	__u16		bs_dmstate;	/* DMIG state info		*/
+	__u16		bs_aextents;	/* attribute number of extents	*/
+} _PACKED compat_xfs_bstat_t;
+
+STATIC int xfs_bulkstat_one_fmt_compat(
+	void			__user *ubuffer,
+	const xfs_bstat_t	*buffer)
+{
+	compat_xfs_bstat_t __user *p32 = ubuffer;
+
+	if (put_user(buffer->bs_ino, &p32->bs_ino) ||
+	    put_user(buffer->bs_mode, &p32->bs_mode) ||
+	    put_user(buffer->bs_nlink, &p32->bs_nlink) ||
+	    put_user(buffer->bs_uid, &p32->bs_uid) ||
+	    put_user(buffer->bs_gid, &p32->bs_gid) ||
+	    put_user(buffer->bs_rdev, &p32->bs_rdev) ||
+	    put_user(buffer->bs_blksize, &p32->bs_blksize) ||
+	    put_user(buffer->bs_size, &p32->bs_size) ||
+	    xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+	    xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+	    xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+	    put_user(buffer->bs_blocks, &p32->bs_blocks) ||
+	    put_user(buffer->bs_xflags, &p32->bs_xflags) ||
+	    put_user(buffer->bs_extsize, &p32->bs_extsize) ||
+	    put_user(buffer->bs_extents, &p32->bs_extents) ||
+	    put_user(buffer->bs_gen, &p32->bs_gen) ||
+	    put_user(buffer->bs_projid, &p32->bs_projid) ||
+	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
+	    put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
+	    put_user(buffer->bs_aextents, &p32->bs_aextents))
+		return -EFAULT;
+	return sizeof(*p32);
+}
+
+
+
+typedef struct compat_xfs_fsop_bulkreq {
 	compat_uptr_t	lastip;		/* last inode # pointer		*/
 	__s32		icount;		/* count of entries in buffer	*/
 	compat_uptr_t	ubuffer;	/* user buffer for inode desc.	*/
-	__s32		ocount;		/* output count pointer		*/
-} xfs_fsop_bulkreq32_t;
+	compat_uptr_t	ocount;		/* output count pointer		*/
+} compat_xfs_fsop_bulkreq_t;
 
-STATIC unsigned long
-xfs_ioctl32_bulkstat(
-	unsigned long		arg)
+#define XFS_IOC_FSBULKSTAT_32 \
+	_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+	_IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+	_IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_ioc_bulkstat_compat(
+	xfs_mount_t		*mp,
+	unsigned int		cmd,
+	void			__user *arg)
 {
-	xfs_fsop_bulkreq32_t	__user *p32 = (void __user *)arg;
-	xfs_fsop_bulkreq_t	__user *p = compat_alloc_user_space(sizeof(*p));
+	compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
 	u32			addr;
+	xfs_fsop_bulkreq_t	bulkreq;
+	int			count;	/* # of records returned */
+	xfs_ino_t		inlast;	/* last inode number */
+	int			done;
+	int			error;
+
+	/* done = 1 if there are more stats to get and if bulkstat */
+	/* should be called again (unused here, but used in dmapi) */
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (get_user(addr, &p32->lastip))
+		return -EFAULT;
+	bulkreq.lastip = compat_ptr(addr);
+	if (get_user(bulkreq.icount, &p32->icount) ||
+	    get_user(addr, &p32->ubuffer))
+		return -EFAULT;
+	bulkreq.ubuffer = compat_ptr(addr);
+	if (get_user(addr, &p32->ocount))
+		return -EFAULT;
+	bulkreq.ocount = compat_ptr(addr);
+
+	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+		return -XFS_ERROR(EFAULT);
+
+	if ((count = bulkreq.icount) <= 0)
+		return -XFS_ERROR(EINVAL);
+
+	if (cmd == XFS_IOC_FSINUMBERS)
+		error = xfs_inumbers(mp, &inlast, &count,
+				bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+	else {
+		/* declare a var to get a warning in case the type changes */
+		bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
+		error = xfs_bulkstat(mp, &inlast, &count,
+			xfs_bulkstat_one, formatter,
+			sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
+			BULKSTAT_FG_QUICK, &done);
+	}
+	if (error)
+		return -error;
+
+	if (bulkreq.ocount != NULL) {
+		if (copy_to_user(bulkreq.lastip, &inlast,
+						sizeof(xfs_ino_t)))
+			return -XFS_ERROR(EFAULT);
+
+		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+			return -XFS_ERROR(EFAULT);
+	}
+
+	return 0;
+}
+
+
+
+typedef struct compat_xfs_fsop_handlereq {
+	__u32		fd;		/* fd for FD_TO_HANDLE		*/
+	compat_uptr_t	path;		/* user pathname		*/
+	__u32		oflags;		/* open flags			*/
+	compat_uptr_t	ihandle;	/* user supplied handle		*/
+	__u32		ihandlen;	/* user supplied length		*/
+	compat_uptr_t	ohandle;	/* user buffer for handle	*/
+	compat_uptr_t	ohandlen;	/* user buffer length		*/
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+	_IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+	_IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+	_IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+	_IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+	_IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
+{
+	compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
+	xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
+	u32 addr;
 
-	if (get_user(addr, &p32->lastip) ||
-	    put_user(compat_ptr(addr), &p->lastip) ||
-	    copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
-	    get_user(addr, &p32->ubuffer) ||
-	    put_user(compat_ptr(addr), &p->ubuffer) ||
-	    get_user(addr, &p32->ocount) ||
-	    put_user(compat_ptr(addr), &p->ocount))
+	if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
+	    get_user(addr, &p32->path) ||
+	    put_user(compat_ptr(addr), &p->path) ||
+	    copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
+	    get_user(addr, &p32->ihandle) ||
+	    put_user(compat_ptr(addr), &p->ihandle) ||
+	    copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
+	    get_user(addr, &p32->ohandle) ||
+	    put_user(compat_ptr(addr), &p->ohandle) ||
+	    get_user(addr, &p32->ohandlen) ||
+	    put_user(compat_ptr(addr), &p->ohandlen))
 		return -EFAULT;
 
 	return (unsigned long)p;
 }
-#endif
+
 
 STATIC long
 xfs_compat_ioctl(
@@ -118,7 +375,6 @@ xfs_compat_ioctl(
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
-	case XFS_IOC_FSGEOMETRY_V1:
 	case XFS_IOC_FSGEOMETRY:
 	case XFS_IOC_GETVERSION:
 	case XFS_IOC_GETXFLAGS:
@@ -131,12 +387,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_GETBMAPA:
 	case XFS_IOC_GETBMAPX:
 /* not handled
-	case XFS_IOC_FD_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_FSHANDLE:
-	case XFS_IOC_OPEN_BY_HANDLE:
 	case XFS_IOC_FSSETDM_BY_HANDLE:
-	case XFS_IOC_READLINK_BY_HANDLE:
 	case XFS_IOC_ATTRLIST_BY_HANDLE:
 	case XFS_IOC_ATTRMULTI_BY_HANDLE:
 */
@@ -166,6 +417,10 @@ xfs_compat_ioctl(
 		arg = xfs_ioctl32_flock(arg);
 		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
 		break;
+	case XFS_IOC_FSGEOMETRY_V1_32:
+		arg = xfs_ioctl32_geom_v1(arg);
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
+		break;
 
 #else /* These are handled fine if no alignment issues */
 	case XFS_IOC_ALLOCSP:
@@ -176,18 +431,28 @@ xfs_compat_ioctl(
 	case XFS_IOC_FREESP64:
 	case XFS_IOC_RESVSP64:
 	case XFS_IOC_UNRESVSP64:
+	case XFS_IOC_FSGEOMETRY_V1:
 		break;
 
 	/* xfs_bstat_t still has wrong u32 vs u64 alignment */
 	case XFS_IOC_SWAPEXT:
 		break;
 
-	case XFS_IOC_FSBULKSTAT_SINGLE:
-	case XFS_IOC_FSBULKSTAT:
-	case XFS_IOC_FSINUMBERS:
-		arg = xfs_ioctl32_bulkstat(arg);
-		break;
 #endif
+	case XFS_IOC_FSBULKSTAT_32:
+	case XFS_IOC_FSBULKSTAT_SINGLE_32:
+	case XFS_IOC_FSINUMBERS_32:
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
+		return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
+				cmd, (void*)arg);
+	case XFS_IOC_FD_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_FSHANDLE_32:
+	case XFS_IOC_OPEN_BY_HANDLE_32:
+	case XFS_IOC_READLINK_BY_HANDLE_32:
+		arg = xfs_ioctl32_fshandle(arg);
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4..330c4ba9d40 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
  * Feature macros (disable/enable)
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
-#define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
 #define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
@@ -124,6 +123,7 @@
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 #define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs	xfs_params.fstrm_timer.val
 
 #define current_cpu()		(raw_smp_processor_id())
 #define current_pid()		(current->pid)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 86fb671a8bc..765ec16a6e3 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -159,7 +159,7 @@ xfs_iozero(
 		if (status)
 			goto unlock;
 
-		memclear_highpage_flush(page, offset, bytes);
+		zero_user_page(page, offset, bytes, KM_USER0);
 
 		status = mapping->a_ops->commit_write(NULL, page, offset,
 							offset + bytes);
@@ -287,50 +287,6 @@ xfs_read(
 }
 
 ssize_t
-xfs_sendfile(
-	bhv_desc_t		*bdp,
-	struct file		*filp,
-	loff_t			*offset,
-	int			ioflags,
-	size_t			count,
-	read_actor_t		actor,
-	void			*target,
-	cred_t			*credp)
-{
-	xfs_inode_t		*ip = XFS_BHVTOI(bdp);
-	xfs_mount_t		*mp = ip->i_mount;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_read_calls);
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-	    (!(ioflags & IO_INVIS))) {
-		bhv_vrwlock_t locktype = VRWLOCK_READ;
-		int error;
-
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-				      *offset, count,
-				      FILP_DELAY_FLAG(filp), &locktype);
-		if (error) {
-			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-			return -error;
-		}
-	}
-	xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-		   (void *)(unsigned long)target, count, *offset, ioflags);
-	ret = generic_file_sendfile(filp, offset, count, actor, target);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_read_bytes, ret);
-
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-	return ret;
-}
-
-ssize_t
 xfs_splice_read(
 	bhv_desc_t		*bdp,
 	struct file		*infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d216..7c60a1eed88 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
-				loff_t *, int, size_t, read_actor_t,
-				void *, struct cred *);
 extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
 				struct pipe_inode_info *, size_t, int, int,
 				struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909b..06894cf00b1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
 
 	if (!(vfsp->vfs_flag & VFS_RDONLY))
 		error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
-					SYNC_ATTR | SYNC_REFCACHE, NULL);
+					SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
+					NULL);
 	vfsp->vfs_sync_seq++;
 	wake_up(&vfsp->vfs_wait_single_sync_task);
 }
@@ -663,7 +664,7 @@ xfs_fs_sync_super(
 		 * occur here so don't bother flushing the buftarg (i.e
 		 * SYNC_QUIESCE) because it'll just get dirty again.
 		 */
-		flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT;
+		flags = SYNC_DATA_QUIESCE;
 	} else
 		flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
 
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index cd6eaa44aa2..bb997d75c05 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
 		.extra1		= &xfs_params.inherit_nodfrg.min,
 		.extra2		= &xfs_params.inherit_nodfrg.max
 	},
+	{
+		.ctl_name	= XFS_FILESTREAM_TIMER,
+		.procname	= "filestream_centisecs",
+		.data		= &xfs_params.fstrm_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xfs_params.fstrm_timer.min,
+		.extra2		= &xfs_params.fstrm_timer.max,
+	},
 	/* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
 	{
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a631fb8cc5a..98b97e399d6 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
 	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
 	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
 	xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+	xfs_sysctl_val_t fstrm_timer;	/* Filestream dir-AG assoc'n timeout. */
 } xfs_param_t;
 
 /*
@@ -86,6 +87,7 @@ enum {
 	XFS_INHERIT_NOSYM = 19,
 	XFS_ROTORSTEP = 20,
 	XFS_INHERIT_NODFRG = 21,
+	XFS_FILESTREAM_TIMER = 22,
 };
 
 extern xfs_param_t	xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5..dca3481aaaf 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,21 @@ typedef enum {
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
 #define SYNC_IOWAIT		0x0100  /* wait for all I/O to complete */
+#define SYNC_SUPER		0x0200  /* flush superblock to disk */
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem,
+ * we have two phases to execute. This first phase is syncing the data
+ * before we quiesce the fielsystem, and the second is flushing all the
+ * inodes out after we've waited for all the transactions created by
+ * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
+ * to ensure that the inodes are written to their location on disk
+ * rather than just existing in transactions in the log. This means
+ * after a quiesce there is no log replay required to write the inodes
+ * to disk (this is the main difference between a sync and a quiesce).
+ */
+#define SYNC_DATA_QUIESCE	(SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
+#define SYNC_INODE_QUIESCE	(SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
 
 #define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
 #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d..5742d65f078 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -129,19 +129,13 @@ typedef enum bhv_vchange {
 	VCHANGE_FLAGS_IOEXCL_COUNT	= 4
 } bhv_vchange_t;
 
-typedef enum { L_FALSE, L_TRUE } lastclose_t;
-
 typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
-typedef int	(*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
 typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
 typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
-				loff_t *, int, size_t, read_actor_t,
-				void *, struct cred *);
 typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
 				struct pipe_inode_info *, size_t, int, int,
 				struct cred *);
@@ -203,10 +197,8 @@ typedef int	(*vop_iflush_t)(bhv_desc_t *, int);
 typedef struct bhv_vnodeops {
 	bhv_position_t  vn_position;    /* position within behavior chain */
 	vop_open_t		vop_open;
-	vop_close_t		vop_close;
 	vop_read_t		vop_read;
 	vop_write_t		vop_write;
-	vop_sendfile_t		vop_sendfile;
 	vop_splice_read_t	vop_splice_read;
 	vop_splice_write_t	vop_splice_write;
 	vop_ioctl_t		vop_ioctl;
@@ -249,13 +241,10 @@ typedef struct bhv_vnodeops {
 #define VNHEAD(vp)	((vp)->v_bh.bh_first)
 #define VOP(op, vp)	(*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
 #define bhv_vop_open(vp, cr)		VOP(vop_open, vp)(VNHEAD(vp),cr)
-#define bhv_vop_close(vp, f,last,cr)	VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
 #define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr)		\
 		VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
 #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)		\
 		VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
-#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr)		\
-		VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
 #define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr)			\
 		VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
 #define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr)		\
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3e4a8ad8a34..7def4c69934 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -62,10 +62,9 @@ uint		ndquot;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
-static kmem_shaker_t	xfs_qm_shaker;
+static struct shrinker *xfs_qm_shaker;
 
 static cred_t	xfs_zerocr;
-static xfs_inode_t	xfs_zeroino;
 
 STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -150,7 +149,7 @@ xfs_Gqm_init(void)
 	} else
 		xqm->qm_dqzone = qm_dqzone;
 
-	xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
+	xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake);
 
 	/*
 	 * The t_dqinfo portion of transactions.
@@ -182,7 +181,7 @@ xfs_qm_destroy(
 
 	ASSERT(xqm != NULL);
 	ASSERT(xqm->qm_nrefs == 0);
-	kmem_shake_deregister(xfs_qm_shaker);
+	remove_shrinker(xfs_qm_shaker);
 	hsize = xqm->qm_dqhashmask + 1;
 	for (i = 0; i < hsize; i++) {
 		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc(
 		return error;
 	}
 
-	if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0,
+	if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
 				   &xfs_zerocr, 0, 1, ip, &committed))) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index bf0a12040b1..b5a7d92c684 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -38,6 +38,7 @@
 #define XFS_RW_TRACE 1
 #define XFS_BUF_TRACE 1
 #define XFS_VNODE_TRACE 1
+#define XFS_FILESTREAMS_TRACE 1
 #endif
 
 #include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5..51c09c114a2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
 	__be32		agf_flcount;	/* count of blocks in freelist */
 	__be32		agf_freeblks;	/* total free blocks */
 	__be32		agf_longest;	/* longest free space */
+	__be32		agf_btreeblks;	/* # of blocks held in AGF btrees */
 } xfs_agf_t;
 
 #define	XFS_AGF_MAGICNUM	0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
 #define	XFS_AGF_FLCOUNT		0x00000100
 #define	XFS_AGF_FREEBLKS	0x00000200
 #define	XFS_AGF_LONGEST		0x00000400
-#define	XFS_AGF_NUM_BITS	11
+#define	XFS_AGF_BTREEBLKS	0x00000800
+#define	XFS_AGF_NUM_BITS	12
 #define	XFS_AGF_ALL_BITS	((1 << XFS_AGF_NUM_BITS) - 1)
 
 /* disk block (xfs_daddr_t) in the AG */
@@ -186,12 +188,15 @@ typedef struct xfs_perag
 	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
+	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
 	xfs_agino_t	pagi_freecount;	/* number of free inodes */
+	xfs_agino_t	pagi_count;	/* number of allocated inodes */
+	int		pagb_count;	/* pagb slots in use */
 #ifdef __KERNEL__
 	lock_t		pagb_lock;	/* lock for pagb_list */
 #endif
-	int		pagb_count;	/* pagb slots in use */
 	xfs_perag_busy_t *pagb_list;	/* unstable blocks */
+	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
 } xfs_perag_t;
 
 #define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd..012a649a19c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 ktrace_t *xfs_alloc_trace_buf;
 
 #define	TRACE_ALLOC(s,a)	\
-	xfs_alloc_trace_alloc(fname, s, a, __LINE__)
+	xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
 #define	TRACE_FREE(s,a,b,x,f)	\
-	xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
+	xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
 #define	TRACE_MODAGF(s,a,f)	\
-	xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
-#define	TRACE_BUSY(fname,s,ag,agb,l,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define	TRACE_UNBUSY(fname,s,ag,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
+	xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
+#define	TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
+#define	TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
+#define	TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
 #else
 #define	TRACE_ALLOC(s,a)
 #define	TRACE_FREE(s,a,b,x,f)
@@ -420,7 +420,7 @@ xfs_alloc_read_agfl(
  */
 STATIC void
 xfs_alloc_trace_alloc(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_alloc_arg_t	*args,		/* allocation argument structure */
 	int		line)		/* source line number */
@@ -453,7 +453,7 @@ xfs_alloc_trace_alloc(
  */
 STATIC void
 xfs_alloc_trace_free(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
@@ -479,7 +479,7 @@ xfs_alloc_trace_free(
  */
 STATIC void
 xfs_alloc_trace_modagf(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agf_t	*agf,		/* new agf value */
@@ -507,7 +507,7 @@ xfs_alloc_trace_modagf(
 
 STATIC void
 xfs_alloc_trace_busy(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
@@ -549,9 +549,6 @@ xfs_alloc_ag_vextent(
 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 {
 	int		error=0;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent";
-#endif
 
 	ASSERT(args->minlen > 0);
 	ASSERT(args->maxlen > 0);
@@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact(
 	xfs_agblock_t	fbno;	/* start block of found extent */
 	xfs_agblock_t	fend;	/* end block of found extent */
 	xfs_extlen_t	flen;	/* length of found extent */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_exact";
-#endif
 	int		i;	/* success/failure of operation */
 	xfs_agblock_t	maxend;	/* end of maximal extent */
 	xfs_agblock_t	minend;	/* end of minimal extent */
@@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near(
 	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
 	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
 	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_near";
-#endif
 	xfs_agblock_t	gtbno;		/* start bno of right side entry */
 	xfs_agblock_t	gtbnoa;		/* aligned ... */
 	xfs_extlen_t	gtdiff;		/* difference to right side entry */
@@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size(
 	int		error;		/* error result */
 	xfs_agblock_t	fbno;		/* start of found freespace */
 	xfs_extlen_t	flen;		/* length of found freespace */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_size";
-#endif
 	int		i;		/* temp status variable */
 	xfs_agblock_t	rbno;		/* returned block number */
 	xfs_extlen_t	rlen;		/* length of returned extent */
@@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small(
 	int		error;
 	xfs_agblock_t	fbno;
 	xfs_extlen_t	flen;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_small";
-#endif
 	int		i;
 
 	if ((error = xfs_alloc_decrement(ccur, 0, &i)))
@@ -1447,7 +1432,8 @@ xfs_alloc_ag_vextent_small(
 	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
 		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
 		  > args->minleft)) {
-		if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
+		error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+		if (error)
 			goto error0;
 		if (fbno != NULLAGBLOCK) {
 			if (args->userdata) {
@@ -1515,9 +1501,6 @@ xfs_free_ag_extent(
 	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
 	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
 	int		error;		/* error return value */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_free_ag_extent";
-#endif
 	xfs_agblock_t	gtbno;		/* start of right neighbor block */
 	xfs_extlen_t	gtlen;		/* length of right neighbor block */
 	int		haveleft;	/* have a left neighbor block */
@@ -1923,7 +1906,8 @@ xfs_alloc_fix_freelist(
 	while (be32_to_cpu(agf->agf_flcount) > need) {
 		xfs_buf_t	*bp;
 
-		if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
+		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+		if (error)
 			return error;
 		if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
 			return error;
@@ -1973,8 +1957,9 @@ xfs_alloc_fix_freelist(
 		 * Put each allocated block on the list.
 		 */
 		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
-			if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
-					bno)))
+			error = xfs_alloc_put_freelist(tp, agbp,
+							agflbp, bno, 0);
+			if (error)
 				return error;
 		}
 	}
@@ -1991,16 +1976,15 @@ int				/* error */
 xfs_alloc_get_freelist(
 	xfs_trans_t	*tp,	/* transaction pointer */
 	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
-	xfs_agblock_t	*bnop)	/* block address retrieved from freelist */
+	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
+	int		btreeblk) /* destination is a AGF btree */
 {
 	xfs_agf_t	*agf;	/* a.g. freespace structure */
 	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
 	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
 	xfs_agblock_t	bno;	/* block number returned */
 	int		error;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_get_freelist";
-#endif
+	int		logflags;
 	xfs_mount_t	*mp;	/* mount structure */
 	xfs_perag_t	*pag;	/* per allocation group data */
 
@@ -2032,8 +2016,16 @@ xfs_alloc_get_freelist(
 	be32_add(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
-	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
-	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+
+	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
+	if (btreeblk) {
+		be32_add(&agf->agf_btreeblks, 1);
+		pag->pagf_btreeblks++;
+		logflags |= XFS_AGF_BTREEBLKS;
+	}
+
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
 	*bnop = bno;
 
 	/*
@@ -2071,6 +2063,7 @@ xfs_alloc_log_agf(
 		offsetof(xfs_agf_t, agf_flcount),
 		offsetof(xfs_agf_t, agf_freeblks),
 		offsetof(xfs_agf_t, agf_longest),
+		offsetof(xfs_agf_t, agf_btreeblks),
 		sizeof(xfs_agf_t)
 	};
 
@@ -2106,15 +2099,14 @@ xfs_alloc_put_freelist(
 	xfs_trans_t		*tp,	/* transaction pointer */
 	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
 	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
-	xfs_agblock_t		bno)	/* block being freed */
+	xfs_agblock_t		bno,	/* block being freed */
+	int			btreeblk) /* block came from a AGF btree */
 {
 	xfs_agf_t		*agf;	/* a.g. freespace structure */
 	xfs_agfl_t		*agfl;	/* a.g. free block array */
 	__be32			*blockp;/* pointer to array entry */
 	int			error;
-#ifdef XFS_ALLOC_TRACE
-	static char		fname[] = "xfs_alloc_put_freelist";
-#endif
+	int			logflags;
 	xfs_mount_t		*mp;	/* mount structure */
 	xfs_perag_t		*pag;	/* per allocation group data */
 
@@ -2132,11 +2124,22 @@ xfs_alloc_put_freelist(
 	be32_add(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
+
+	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
+	if (btreeblk) {
+		be32_add(&agf->agf_btreeblks, -1);
+		pag->pagf_btreeblks--;
+		logflags |= XFS_AGF_BTREEBLKS;
+	}
+
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
+
 	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
 	blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
 	*blockp = cpu_to_be32(bno);
-	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
-	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
 	xfs_trans_log_buf(tp, agflbp,
 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2199,7 @@ xfs_alloc_read_agf(
 	pag = &mp->m_perag[agno];
 	if (!pag->pagf_init) {
 		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
 		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
 		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
 		pag->pagf_levels[XFS_BTNUM_BNOi] =
@@ -2235,9 +2239,6 @@ xfs_alloc_vextent(
 	xfs_agblock_t	agsize;	/* allocation group size */
 	int		error;
 	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_vextent";
-#endif
 	xfs_extlen_t	minleft;/* minimum left value, temp copy */
 	xfs_mount_t	*mp;	/* mount structure pointer */
 	xfs_agnumber_t	sagno;	/* starting allocation group number */
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120cc..5aec15d0651 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int				/* error */
 xfs_alloc_get_freelist(
 	struct xfs_trans *tp,	/* transaction pointer */
 	struct xfs_buf	*agbp,	/* buffer containing the agf structure */
-	xfs_agblock_t	*bnop);	/* block address retrieved from freelist */
+	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
+	int		btreeblk); /* destination is a AGF btree */
 
 /*
  * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
 	struct xfs_trans *tp,	/* transaction pointer */
 	struct xfs_buf	*agbp,	/* buffer for a.g. freelist header */
 	struct xfs_buf	*agflbp,/* buffer for a.g. free block array */
-	xfs_agblock_t	bno);	/* block being freed */
+	xfs_agblock_t	bno,	/* block being freed */
+	int		btreeblk); /* owner was a AGF btree */
 
 /*
  * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e..1603ce59585 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
 			/*
 			 * Put this buffer/block on the ag's freelist.
 			 */
-			if ((error = xfs_alloc_put_freelist(cur->bc_tp,
-					cur->bc_private.a.agbp, NULL, bno)))
+			error = xfs_alloc_put_freelist(cur->bc_tp,
+					cur->bc_private.a.agbp, NULL, bno, 1);
+			if (error)
 				return error;
 			/*
 			 * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
 	/*
 	 * Free the deleting block by putting it on the freelist.
 	 */
-	if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			NULL, rbno)))
+	error = xfs_alloc_put_freelist(cur->bc_tp,
+					 cur->bc_private.a.agbp, NULL, rbno, 1);
+	if (error)
 		return error;
 	/*
 	 * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
 	/*
 	 * Get a buffer from the freelist blocks, for the new root.
 	 */
-	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			&nbno)))
+	error = xfs_alloc_get_freelist(cur->bc_tp,
+					cur->bc_private.a.agbp, &nbno, 1);
+	if (error)
 		return error;
 	/*
 	 * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
 	 * Allocate the new block from the freelist.
 	 * If we can't do it, we're toast.  Give up.
 	 */
-	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			&rbno)))
+	error = xfs_alloc_get_freelist(cur->bc_tp,
+					 cur->bc_private.a.agbp, &rbno, 1);
+	if (error)
 		return error;
 	if (rbno == NULLAGBLOCK) {
 		*stat = 0;
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 1afe07f67e3..fab0b6d5a41 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -66,44 +66,6 @@ static const char xfs_highbit[256] = {
 #endif
 
 /*
- * Count of bits set in byte, 0..8.
- */
-static const char xfs_countbit[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3,			/* 00 .. 07 */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 08 .. 0f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 10 .. 17 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 18 .. 1f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 20 .. 27 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 28 .. 2f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 30 .. 37 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 38 .. 3f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 40 .. 47 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 48 .. 4f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 50 .. 57 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 58 .. 5f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 60 .. 67 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 68 .. 6f */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 70 .. 77 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* 78 .. 7f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 80 .. 87 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 88 .. 8f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 90 .. 97 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 98 .. 9f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* a0 .. a7 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* a8 .. af */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* b0 .. b7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* b8 .. bf */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* c0 .. c7 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* c8 .. cf */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* d0 .. d7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* d8 .. df */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* e0 .. e7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* e8 .. ef */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* f0 .. f7 */
-	5, 6, 6, 7, 6, 7, 7, 8,			/* f8 .. ff */
-};
-
-/*
  * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
  */
 inline int
@@ -167,56 +129,21 @@ xfs_highbit64(
 
 
 /*
- * Count the number of bits set in the bitmap starting with bit
- * start_bit.  Size is the size of the bitmap in words.
- *
- * Do the counting by mapping a byte value to the number of set
- * bits for that value using the xfs_countbit array, i.e.
- * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1,
- * xfs_countbit[3] == 2, etc.
+ * Return whether bitmap is empty.
+ * Size is number of words in the bitmap, which is padded to word boundary
+ * Returns 1 for empty, 0 for non-empty.
  */
 int
-xfs_count_bits(uint *map, uint size, uint start_bit)
+xfs_bitmap_empty(uint *map, uint size)
 {
-	register int	bits;
-	register unsigned char	*bytep;
-	register unsigned char	*end_map;
-	int		byte_bit;
-
-	bits = 0;
-	end_map = (char*)(map + size);
-	bytep = (char*)(map + (start_bit & ~0x7));
-	byte_bit = start_bit & 0x7;
-
-	/*
-	 * If the caller fell off the end of the map, return 0.
-	 */
-	if (bytep >= end_map) {
-		return (0);
-	}
-
-	/*
-	 * If start_bit is not byte aligned, then process the
-	 * first byte separately.
-	 */
-	if (byte_bit != 0) {
-		/*
-		 * Shift off the bits we don't want to look at,
-		 * before indexing into xfs_countbit.
-		 */
-		bits += xfs_countbit[(*bytep >> byte_bit)];
-		bytep++;
-	}
+	uint i;
+	uint ret = 0;
 
-	/*
-	 * Count the bits in each byte until the end of the bitmap.
-	 */
-	while (bytep < end_map) {
-		bits += xfs_countbit[*bytep];
-		bytep++;
+	for (i = 0; i < size; i++) {
+		ret |= map[i];
 	}
 
-	return (bits);
+	return (ret == 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 0bbe5681754..082641a9782 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v);
 /* Get high bit set out of 64-bit argument, -1 if none set */
 extern int xfs_highbit64(__uint64_t);
 
-/* Count set bits in map starting with start_bit */
-extern int xfs_count_bits(uint *map, uint size, uint start_bit);
+/* Return whether bitmap is empty (1 == empty) */
+extern int xfs_bitmap_empty(uint *map, uint size);
 
 /* Count continuous one bits in map starting with start_bit */
 extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b1ea26e40aa..94b5c5fe268 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -52,6 +52,7 @@
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
 #include "xfs_buf_item.h"
+#include "xfs_filestream.h"
 
 
 #ifdef DEBUG
@@ -277,7 +278,7 @@ xfs_bmap_isaeof(
 STATIC void
 xfs_bmap_trace_addentry(
 	int		opcode,		/* operation */
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(ies) */
@@ -291,7 +292,7 @@ xfs_bmap_trace_addentry(
  */
 STATIC void
 xfs_bmap_trace_delete(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
@@ -304,7 +305,7 @@ xfs_bmap_trace_delete(
  */
 STATIC void
 xfs_bmap_trace_insert(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
@@ -318,7 +319,7 @@ xfs_bmap_trace_insert(
  */
 STATIC void
 xfs_bmap_trace_post_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry updated */
@@ -329,17 +330,25 @@ xfs_bmap_trace_post_update(
  */
 STATIC void
 xfs_bmap_trace_pre_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry to be updated */
 	int		whichfork);	/* data or attr fork */
 
+#define	XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)	\
+	xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
+#define	XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)	\
+	xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
+#define	XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)	\
+	xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
+#define	XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)	\
+	xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
 #else
-#define	xfs_bmap_trace_delete(f,d,ip,i,c,w)
-#define	xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)
-#define	xfs_bmap_trace_post_update(f,d,ip,i,w)
-#define	xfs_bmap_trace_pre_update(f,d,ip,i,w)
+#define	XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
+#define	XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
+#define	XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
+#define	XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
 #endif	/* XFS_BMAP_TRACE */
 
 /*
@@ -531,9 +540,6 @@ xfs_bmap_add_extent(
 	xfs_filblks_t		da_new; /* new count del alloc blocks used */
 	xfs_filblks_t		da_old; /* old count del alloc blocks used */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent";
-#endif
 	xfs_ifork_t		*ifp;	/* inode fork ptr */
 	int			logflags; /* returned value */
 	xfs_extnum_t		nextents; /* number of extents in file now */
@@ -551,8 +557,8 @@ xfs_bmap_add_extent(
 	 * already extents in the list.
 	 */
 	if (nextents == 0) {
-		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
-			NULL, whichfork);
+		XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL,
+			whichfork);
 		xfs_iext_insert(ifp, 0, 1, new);
 		ASSERT(cur == NULL);
 		ifp->if_lastex = 0;
@@ -710,9 +716,6 @@ xfs_bmap_add_extent_delay_real(
 	int			diff;	/* temp value */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_delay_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
@@ -808,15 +811,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The left and right neighbors are both contiguous with new.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents--;
@@ -855,15 +857,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The left neighbor is contiguous, the right is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
-		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -892,16 +893,13 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The right neighbor is contiguous, the left is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep, new->br_startblock);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount + RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
-		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx + 1, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -931,11 +929,9 @@ xfs_bmap_add_extent_delay_real(
 		 * Neither the left nor right neighbors are contiguous with
 		 * the new one.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep, new->br_startblock);
-		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -963,17 +959,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in the first part of a previous delayed allocation.
 		 * The left neighbor is contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		ip->i_df.if_lastex = idx - 1;
 		if (cur == NULL)
@@ -995,8 +988,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock));
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: The boundary between two in-core extents moved. */
 		temp = LEFT.br_startoff;
@@ -1009,11 +1001,11 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in the first part of a previous delayed allocation.
 		 * The left neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startoff(ep, new_endoff);
 		temp = PREV.br_blockcount - new->br_blockcount;
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -1046,8 +1038,7 @@ xfs_bmap_add_extent_delay_real(
 			(cur ? cur->bc_private.b.allocated : 0));
 		ep = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1060,17 +1051,14 @@ xfs_bmap_add_extent_delay_real(
 		 * The right neighbor is contiguous with the new allocation.
 		 */
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
 			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount,
 			RIGHT.br_state);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx + 1;
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -1091,8 +1079,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock));
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: The boundary between two in-core extents moved. */
 		temp = PREV.br_startoff;
@@ -1106,10 +1093,10 @@ xfs_bmap_add_extent_delay_real(
 		 * The right neighbor is not contiguous.
 		 */
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
-			new, NULL, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
+			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
@@ -1141,7 +1128,7 @@ xfs_bmap_add_extent_delay_real(
 			(cur ? cur->bc_private.b.allocated : 0));
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1155,7 +1142,7 @@ xfs_bmap_add_extent_delay_real(
 		 * This case is avoided almost all the time.
 		 */
 		temp = new->br_startoff - PREV.br_startoff;
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		r[0] = *new;
 		r[1].br_state = PREV.br_state;
@@ -1163,7 +1150,7 @@ xfs_bmap_add_extent_delay_real(
 		r[1].br_startoff = new_endoff;
 		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
 		r[1].br_blockcount = temp2;
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
@@ -1222,13 +1209,11 @@ xfs_bmap_add_extent_delay_real(
 		}
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
 			NULLSTARTBLOCK((int)temp2));
-		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
 		*dnew = temp + temp2;
 		/* DELTA: One in-core extent is split in three. */
 		temp = PREV.br_startoff;
@@ -1287,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
@@ -1390,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The left and right neighbors are both contiguous with new.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents -= 2;
@@ -1441,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The left neighbor is contiguous, the right is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
-		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -1484,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The right neighbor is contiguous, the left is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount + RIGHT.br_blockcount);
 		xfs_bmbt_set_state(ep, newext);
-		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
-		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx + 1, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -1529,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Neither the left nor right neighbors are contiguous with
 		 * the new one.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_state(ep, newext);
-		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		if (cur == NULL)
@@ -1559,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the first part of a previous oldext extent to newext.
 		 * The left neighbor is contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep,
 			new->br_startblock + new->br_blockcount);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		if (cur == NULL)
@@ -1610,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the first part of a previous oldext extent to newext.
 		 * The left neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
 		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
 		xfs_bmbt_set_startoff(ep, new_endoff);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
 		xfs_bmbt_set_startblock(ep,
 			new->br_startblock + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -1653,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the last part of a previous oldext extent to newext.
 		 * The right neighbor is contiguous with the new allocation.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx,
 			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
 			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount, newext);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx + 1;
 		if (cur == NULL)
@@ -1700,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the last part of a previous oldext extent to newext.
 		 * The right neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
-			new, NULL, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
+			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
@@ -1744,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real(
 		 * newext.  Contiguity is impossible here.
 		 * One extent becomes three extents.
 		 */
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			new->br_startoff - PREV.br_startoff);
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		r[0] = *new;
 		r[1].br_startoff = new_endoff;
 		r[1].br_blockcount =
 			PREV.br_startoff + PREV.br_blockcount - new_endoff;
 		r[1].br_startblock = new->br_startblock + new->br_blockcount;
 		r[1].br_state = oldext;
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
@@ -1845,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay(
 	int			rsvd)		/* OK to allocate reserved blocks */
 {
 	xfs_bmbt_rec_t		*ep;	/* extent record for idx */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
-#endif
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
@@ -1919,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay(
 		 */
 		temp = left.br_blockcount + new->br_blockcount +
 			right.br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1928,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay(
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
 			NULLSTARTBLOCK((int)newlen));
-		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		ip->i_df.if_lastex = idx - 1;
 		/* DELTA: Two in-core extents were replaced by one. */
@@ -1946,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay(
 		 * Merge the new allocation with the left neighbor.
 		 */
 		temp = left.br_blockcount + new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1954,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay(
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
 			NULLSTARTBLOCK((int)newlen));
-		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		/* DELTA: One in-core extent grew into a hole. */
@@ -1968,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay(
 		 * on the right.
 		 * Merge the new allocation with the right neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK);
 		temp = new->br_blockcount + right.br_blockcount;
 		oldlen = STARTBLOCKVAL(new->br_startblock) +
 			STARTBLOCKVAL(right.br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_allf(ep, new->br_startoff,
 			NULLSTARTBLOCK((int)newlen), temp, right.br_state);
-		xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		/* DELTA: One in-core extent grew into a hole. */
 		temp2 = temp;
@@ -1989,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay(
 		 * Insert a new entry.
 		 */
 		oldlen = newlen = 0;
-		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -2039,9 +2014,6 @@ xfs_bmap_add_extent_hole_real(
 {
 	xfs_bmbt_rec_t		*ep;	/* pointer to extent entry ins. point */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_hole_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
@@ -2118,15 +2090,14 @@ xfs_bmap_add_extent_hole_real(
 		 * left and on the right.
 		 * Merge all three into a single extent record.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
 			whichfork);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount +
 			right.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
 			whichfork);
-		xfs_bmap_trace_delete(fname, "LC|RC", ip,
-			idx, 1, whichfork);
+		XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork);
 		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx - 1;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2168,10 +2139,10 @@ xfs_bmap_add_extent_hole_real(
 		 * on the left.
 		 * Merge the new allocation with the left neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork);
 		ifp->if_lastex = idx - 1;
 		if (cur == NULL) {
 			rval = XFS_ILOG_FEXT(whichfork);
@@ -2202,11 +2173,11 @@ xfs_bmap_add_extent_hole_real(
 		 * on the right.
 		 * Merge the new allocation with the right neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork);
 		xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
 			new->br_blockcount + right.br_blockcount,
 			right.br_state);
-		xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork);
 		ifp->if_lastex = idx;
 		if (cur == NULL) {
 			rval = XFS_ILOG_FEXT(whichfork);
@@ -2237,8 +2208,7 @@ xfs_bmap_add_extent_hole_real(
 		 * real allocation.
 		 * Insert a new entry.
 		 */
-		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
-			whichfork);
+		XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ifp->if_lastex = idx;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2605,12 +2575,10 @@ xfs_bmap_rtalloc(
 	xfs_extlen_t	prod = 0;	/* product factor for allocators */
 	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
 	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtx;		/* realtime extent number */
 	xfs_rtblock_t	rtb;
 
 	mp = ap->ip->i_mount;
-	align = ap->ip->i_d.di_extsize ?
-		ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+	align = xfs_get_extsz_hint(ap->ip);
 	prod = align / mp->m_sb.sb_rextsize;
 	error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
 					align, 1, ap->eof, 0,
@@ -2644,6 +2612,8 @@ xfs_bmap_rtalloc(
 	 * pick an extent that will space things out in the rt area.
 	 */
 	if (ap->eof && ap->off == 0) {
+		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
+
 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
 		if (error)
 			return error;
@@ -2715,9 +2685,7 @@ xfs_bmap_btalloc(
 	int		error;
 
 	mp = ap->ip->i_mount;
-	align = (ap->userdata && ap->ip->i_d.di_extsize &&
-		(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
-		ap->ip->i_d.di_extsize : 0;
+	align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
 	if (unlikely(align)) {
 		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
 						align, 0, ap->eof, 0, ap->conv,
@@ -2727,9 +2695,15 @@ xfs_bmap_btalloc(
 	}
 	nullfb = ap->firstblock == NULLFSBLOCK;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-	if (nullfb)
-		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-	else
+	if (nullfb) {
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
+			ag = xfs_filestream_lookup_ag(ap->ip);
+			ag = (ag != NULLAGNUMBER) ? ag : 0;
+			ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
+		} else {
+			ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+		}
+	} else
 		ap->rval = ap->firstblock;
 
 	xfs_bmap_adjacent(ap);
@@ -2753,13 +2727,22 @@ xfs_bmap_btalloc(
 	args.firstblock = ap->firstblock;
 	blen = 0;
 	if (nullfb) {
-		args.type = XFS_ALLOCTYPE_START_BNO;
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+			args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		else
+			args.type = XFS_ALLOCTYPE_START_BNO;
 		args.total = ap->total;
+
 		/*
-		 * Find the longest available space.
-		 * We're going to try for the whole allocation at once.
+		 * Search for an allocation group with a single extent
+		 * large enough for the request.
+		 *
+		 * If one isn't found, then adjust the minimum allocation
+		 * size to the largest space found.
 		 */
 		startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+		if (startag == NULLAGNUMBER)
+			startag = ag = 0;
 		notinit = 0;
 		down_read(&mp->m_peraglock);
 		while (blen < ap->alen) {
@@ -2785,6 +2768,35 @@ xfs_bmap_btalloc(
 					blen = longest;
 			} else
 				notinit = 1;
+
+			if (xfs_inode_is_filestream(ap->ip)) {
+				if (blen >= ap->alen)
+					break;
+
+				if (ap->userdata) {
+					/*
+					 * If startag is an invalid AG, we've
+					 * come here once before and
+					 * xfs_filestream_new_ag picked the
+					 * best currently available.
+					 *
+					 * Don't continue looping, since we
+					 * could loop forever.
+					 */
+					if (startag == NULLAGNUMBER)
+						break;
+
+					error = xfs_filestream_new_ag(ap, &ag);
+					if (error) {
+						up_read(&mp->m_peraglock);
+						return error;
+					}
+
+					/* loop again to set 'blen'*/
+					startag = NULLAGNUMBER;
+					continue;
+				}
+			}
 			if (++ag == mp->m_sb.sb_agcount)
 				ag = 0;
 			if (ag == startag)
@@ -2809,17 +2821,27 @@ xfs_bmap_btalloc(
 		 */
 		else
 			args.minlen = ap->alen;
+
+		/*
+		 * set the failure fallback case to look in the selected
+		 * AG as the stream may have moved.
+		 */
+		if (xfs_inode_is_filestream(ap->ip))
+			ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
 	} else if (ap->low) {
-		args.type = XFS_ALLOCTYPE_START_BNO;
+		if (xfs_inode_is_filestream(ap->ip))
+			args.type = XFS_ALLOCTYPE_FIRST_AG;
+		else
+			args.type = XFS_ALLOCTYPE_START_BNO;
 		args.total = args.minlen = ap->minlen;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 		args.total = ap->total;
 		args.minlen = ap->minlen;
 	}
-	if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
-		    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
-		args.prod = ap->ip->i_d.di_extsize;
+	/* apply extent size hints if obtained earlier */
+	if (unlikely(align)) {
+		args.prod = align;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	} else if (mp->m_sb.sb_blocksize >= NBPP) {
@@ -3051,9 +3073,6 @@ xfs_bmap_del_extent(
 	xfs_bmbt_rec_t		*ep;	/* current extent entry pointer */
 	int			error;	/* error return value */
 	int			flags;	/* inode logging flags */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_del_extent";
-#endif
 	xfs_bmbt_irec_t		got;	/* current extent entry */
 	xfs_fileoff_t		got_endoff;	/* first offset past got */
 	int			i;	/* temp state */
@@ -3147,7 +3166,7 @@ xfs_bmap_del_extent(
 		/*
 		 * Matches the whole extent.  Delete the entry.
 		 */
-		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
+		XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork);
 		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx;
 		if (delay)
@@ -3168,7 +3187,7 @@ xfs_bmap_del_extent(
 		/*
 		 * Deleting the first part of the extent.
 		 */
-		xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork);
 		xfs_bmbt_set_startoff(ep, del_endoff);
 		temp = got.br_blockcount - del->br_blockcount;
 		xfs_bmbt_set_blockcount(ep, temp);
@@ -3177,13 +3196,13 @@ xfs_bmap_del_extent(
 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 				da_old);
 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-			xfs_bmap_trace_post_update(fname, "2", ip, idx,
+			XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx,
 				whichfork);
 			da_new = temp;
 			break;
 		}
 		xfs_bmbt_set_startblock(ep, del_endblock);
-		xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork);
 		if (!cur) {
 			flags |= XFS_ILOG_FEXT(whichfork);
 			break;
@@ -3199,19 +3218,19 @@ xfs_bmap_del_extent(
 		 * Deleting the last part of the extent.
 		 */
 		temp = got.br_blockcount - del->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork);
 		xfs_bmbt_set_blockcount(ep, temp);
 		ifp->if_lastex = idx;
 		if (delay) {
 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 				da_old);
 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-			xfs_bmap_trace_post_update(fname, "1", ip, idx,
+			XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx,
 				whichfork);
 			da_new = temp;
 			break;
 		}
-		xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork);
 		if (!cur) {
 			flags |= XFS_ILOG_FEXT(whichfork);
 			break;
@@ -3228,7 +3247,7 @@ xfs_bmap_del_extent(
 		 * Deleting the middle of the extent.
 		 */
 		temp = del->br_startoff - got.br_startoff;
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork);
 		xfs_bmbt_set_blockcount(ep, temp);
 		new.br_startoff = del_endoff;
 		temp2 = got_endoff - del_endoff;
@@ -3315,8 +3334,8 @@ xfs_bmap_del_extent(
 				}
 			}
 		}
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork);
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL,
 			whichfork);
 		xfs_iext_insert(ifp, idx + 1, 1, &new);
 		ifp->if_lastex = idx + 1;
@@ -3556,9 +3575,6 @@ xfs_bmap_local_to_extents(
 {
 	int		error;		/* error return value */
 	int		flags;		/* logging flags returned */
-#ifdef XFS_BMAP_TRACE
-	static char	fname[] = "xfs_bmap_local_to_extents";
-#endif
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 
 	/*
@@ -3613,7 +3629,7 @@ xfs_bmap_local_to_extents(
 		xfs_iext_add(ifp, 0, 1);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
-		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
 		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
@@ -3736,7 +3752,7 @@ ktrace_t	*xfs_bmap_trace_buf;
 STATIC void
 xfs_bmap_trace_addentry(
 	int		opcode,		/* operation */
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(ies) */
@@ -3795,7 +3811,7 @@ xfs_bmap_trace_addentry(
  */
 STATIC void
 xfs_bmap_trace_delete(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
@@ -3817,7 +3833,7 @@ xfs_bmap_trace_delete(
  */
 STATIC void
 xfs_bmap_trace_insert(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
@@ -3846,7 +3862,7 @@ xfs_bmap_trace_insert(
  */
 STATIC void
 xfs_bmap_trace_post_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry updated */
@@ -3864,7 +3880,7 @@ xfs_bmap_trace_post_update(
  */
 STATIC void
 xfs_bmap_trace_pre_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry to be updated */
@@ -4481,9 +4497,6 @@ xfs_bmap_read_extents(
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_exntfmt_t		exntf;	/* XFS_EXTFMT_NOSTATE, if checking */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_read_extents";
-#endif
 	xfs_extnum_t		i, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
@@ -4600,7 +4613,7 @@ xfs_bmap_read_extents(
 	}
 	ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
-	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
+	XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
 	return 0;
 error0:
 	xfs_trans_brelse(tp, bp);
@@ -4613,7 +4626,7 @@ error0:
  */
 void
 xfs_bmap_trace_exlist(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	cnt,		/* count of entries in the list */
 	int		whichfork)	/* data or attr fork */
@@ -4628,7 +4641,7 @@ xfs_bmap_trace_exlist(
 	for (idx = 0; idx < cnt; idx++) {
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_get_all(ep, &s);
-		xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
+		XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
 			whichfork);
 	}
 }
@@ -4868,12 +4881,7 @@ xfs_bmapi(
 				xfs_extlen_t	extsz;
 
 				/* Figure out the extent size, adjust alen */
-				if (rt) {
-					if (!(extsz = ip->i_d.di_extsize))
-						extsz = mp->m_sb.sb_rextsize;
-				} else {
-					extsz = ip->i_d.di_extsize;
-				}
+				extsz = xfs_get_extsz_hint(ip);
 				if (extsz) {
 					error = xfs_bmap_extsize_align(mp,
 							&got, &prev, extsz,
@@ -5219,10 +5227,10 @@ xfs_bmapi(
 		 * Else go on to the next record.
 		 */
 		ep = xfs_iext_get_ext(ifp, ++lastx);
-		if (lastx >= nextents) {
+		prev = got;
+		if (lastx >= nextents)
 			eof = 1;
-			prev = got;
-		} else
+		else
 			xfs_bmbt_get_all(ep, &got);
 	}
 	ifp->if_lastex = lastx;
@@ -5813,8 +5821,7 @@ xfs_getbmap(
 		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
 		return XFS_ERROR(EINVAL);
 	if (whichfork == XFS_DATA_FORK) {
-		if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
-				(XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+		if (xfs_get_extsz_hint(ip) ||
 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
 			fixlen = XFS_MAXIOFFSET(mp);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4f24c7e39b3..524b1c9d524 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -144,12 +144,14 @@ extern ktrace_t	*xfs_bmap_trace_buf;
  */
 void
 xfs_bmap_trace_exlist(
-	char			*fname,		/* function name */
+	const char		*fname,		/* function name */
 	struct xfs_inode	*ip,		/* incore inode pointer */
 	xfs_extnum_t		cnt,		/* count of entries in list */
 	int			whichfork);	/* data or attr fork */
+#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)	\
+	xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
 #else
-#define	xfs_bmap_trace_exlist(f,ip,c,w)
+#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 
 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0bf192fea3e..89b891f51cf 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -76,7 +76,7 @@ static char	EXIT[] = "exit";
  */
 STATIC void
 xfs_bmbt_trace_enter(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	char		*s,
 	int		type,
@@ -117,7 +117,7 @@ xfs_bmbt_trace_enter(
  */
 STATIC void
 xfs_bmbt_trace_argbi(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	xfs_buf_t	*b,
 	int		i,
@@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi(
  */
 STATIC void
 xfs_bmbt_trace_argbii(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	xfs_buf_t	*b,
 	int		i0,
@@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii(
  */
 STATIC void
 xfs_bmbt_trace_argfffi(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	xfs_dfiloff_t		o,
 	xfs_dfsbno_t		b,
@@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi(
  */
 STATIC void
 xfs_bmbt_trace_argi(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	int		i,
 	int		line)
@@ -188,7 +188,7 @@ xfs_bmbt_trace_argi(
  */
 STATIC void
 xfs_bmbt_trace_argifk(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_fsblock_t		f,
@@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk(
  */
 STATIC void
 xfs_bmbt_trace_argifr(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_fsblock_t		f,
@@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr(
  */
 STATIC void
 xfs_bmbt_trace_argik(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_bmbt_key_t		*k,
@@ -255,7 +255,7 @@ xfs_bmbt_trace_argik(
  */
 STATIC void
 xfs_bmbt_trace_cursor(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	char		*s,
 	int		line)
@@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor(
 }
 
 #define	XFS_BMBT_TRACE_ARGBI(c,b,i)	\
-	xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__)
+	xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
 #define	XFS_BMBT_TRACE_ARGBII(c,b,i,j)	\
-	xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__)
+	xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
 #define	XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)	\
-	xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__)
+	xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
 #define	XFS_BMBT_TRACE_ARGI(c,i)	\
-	xfs_bmbt_trace_argi(fname, c, i, __LINE__)
+	xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIFK(c,i,f,s)	\
-	xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__)
+	xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIFR(c,i,f,r)	\
-	xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__)
+	xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIK(c,i,k)	\
-	xfs_bmbt_trace_argik(fname, c, i, k, __LINE__)
+	xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
 #define	XFS_BMBT_TRACE_CURSOR(c,s)	\
-	xfs_bmbt_trace_cursor(fname, c, s, __LINE__)
+	xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
 #else
 #define	XFS_BMBT_TRACE_ARGBI(c,b,i)
 #define	XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -318,9 +318,6 @@ xfs_bmbt_delrec(
 	xfs_fsblock_t		bno;		/* fs-relative block number */
 	xfs_buf_t		*bp;		/* buffer for block */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_delrec";
-#endif
 	int			i;		/* loop counter */
 	int			j;		/* temp state */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
@@ -694,9 +691,6 @@ xfs_bmbt_insrec(
 	xfs_bmbt_block_t	*block;		/* bmap btree block */
 	xfs_buf_t		*bp;		/* buffer for block */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_insrec";
-#endif
 	int			i;		/* loop index */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
 	xfs_bmbt_key_t		*kp=NULL;	/* pointer to bmap btree key */
@@ -881,9 +875,6 @@ xfs_bmbt_killroot(
 #ifdef DEBUG
 	int			error;
 #endif
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_killroot";
-#endif
 	int			i;
 	xfs_bmbt_key_t		*kp;
 	xfs_inode_t		*ip;
@@ -973,9 +964,6 @@ xfs_bmbt_log_keys(
 	int		kfirst,
 	int		klast)
 {
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_log_keys";
-#endif
 	xfs_trans_t	*tp;
 
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs(
 	int		pfirst,
 	int		plast)
 {
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_log_ptrs";
-#endif
 	xfs_trans_t	*tp;
 
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1055,9 +1040,6 @@ xfs_bmbt_lookup(
 	xfs_daddr_t		d;
 	xfs_sfiloff_t		diff;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_lookup";
-#endif
 	xfs_fsblock_t		fsbno=0;
 	int			high;
 	int			i;
@@ -1195,9 +1177,6 @@ xfs_bmbt_lshift(
 	int			*stat)		/* success/failure */
 {
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_lshift";
-#endif
 #ifdef DEBUG
 	int			i;		/* loop counter */
 #endif
@@ -1331,9 +1310,6 @@ xfs_bmbt_rshift(
 	int			*stat)		/* success/failure */
 {
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_rshift";
-#endif
 	int			i;		/* loop counter */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
 	xfs_buf_t		*lbp;		/* left buffer pointer */
@@ -1492,9 +1468,6 @@ xfs_bmbt_split(
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_split";
-#endif
 	int			i;		/* loop counter */
 	xfs_fsblock_t		lbno;		/* left sibling block number */
 	xfs_buf_t		*lbp;		/* left buffer pointer */
@@ -1641,9 +1614,6 @@ xfs_bmbt_updkey(
 #ifdef DEBUG
 	int			error;
 #endif
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_updkey";
-#endif
 	xfs_bmbt_key_t		*kp;
 	int			ptr;
 
@@ -1712,9 +1682,6 @@ xfs_bmbt_decrement(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_decrement";
-#endif
 	xfs_fsblock_t		fsbno;
 	int			lev;
 	xfs_mount_t		*mp;
@@ -1785,9 +1752,6 @@ xfs_bmbt_delete(
 	int		*stat)		/* success/failure */
 {
 	int		error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_delete";
-#endif
 	int		i;
 	int		level;
 
@@ -2000,9 +1964,6 @@ xfs_bmbt_increment(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_increment";
-#endif
 	xfs_fsblock_t		fsbno;
 	int			lev;
 	xfs_mount_t		*mp;
@@ -2080,9 +2041,6 @@ xfs_bmbt_insert(
 	int		*stat)		/* success/failure */
 {
 	int		error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_insert";
-#endif
 	int		i;
 	int		level;
 	xfs_fsblock_t	nbno;
@@ -2142,9 +2100,6 @@ xfs_bmbt_log_block(
 	int			fields)
 {
 	int			first;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_log_block";
-#endif
 	int			last;
 	xfs_trans_t		*tp;
 	static const short	offsets[] = {
@@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs(
 {
 	xfs_bmbt_block_t	*block;
 	int			first;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_log_recs";
-#endif
 	int			last;
 	xfs_bmbt_rec_t		*rp;
 	xfs_trans_t		*tp;
@@ -2245,9 +2197,6 @@ xfs_bmbt_newroot(
 	xfs_bmbt_key_t		*ckp;		/* child key pointer */
 	xfs_bmbt_ptr_t		*cpp;		/* child ptr pointer */
 	int			error;		/* error return code */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_newroot";
-#endif
 #ifdef DEBUG
 	int			i;		/* loop counter */
 #endif
@@ -2630,9 +2579,6 @@ xfs_bmbt_update(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_update";
-#endif
 	xfs_bmbt_key_t		key;
 	int			ptr;
 	xfs_bmbt_rec_t		*rp;
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4e27d55a1e7..6e40a0a198f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -444,30 +444,14 @@ xfs_btree_setbuf(
 /*
  * Min and max functions for extlen, agblock, fileoff, and filblks types.
  */
-#define	XFS_EXTLEN_MIN(a,b)	\
-	((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
-		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#define	XFS_EXTLEN_MAX(a,b)	\
-	((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
-		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#define	XFS_AGBLOCK_MIN(a,b)	\
-	((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
-		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define	XFS_AGBLOCK_MAX(a,b)	\
-	((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
-		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define	XFS_FILEOFF_MIN(a,b)	\
-	((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
-		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define	XFS_FILEOFF_MAX(a,b)	\
-	((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
-		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define	XFS_FILBLKS_MIN(a,b)	\
-	((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
-		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
-#define	XFS_FILBLKS_MAX(a,b)	\
-	((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
-		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#define	XFS_EXTLEN_MIN(a,b)	min_t(xfs_extlen_t, (a), (b))
+#define	XFS_EXTLEN_MAX(a,b)	max_t(xfs_extlen_t, (a), (b))
+#define	XFS_AGBLOCK_MIN(a,b)	min_t(xfs_agblock_t, (a), (b))
+#define	XFS_AGBLOCK_MAX(a,b)	max_t(xfs_agblock_t, (a), (b))
+#define	XFS_FILEOFF_MIN(a,b)	min_t(xfs_fileoff_t, (a), (b))
+#define	XFS_FILEOFF_MAX(a,b)	max_t(xfs_fileoff_t, (a), (b))
+#define	XFS_FILBLKS_MIN(a,b)	min_t(xfs_filblks_t, (a), (b))
+#define	XFS_FILBLKS_MAX(a,b)	max_t(xfs_filblks_t, (a), (b))
 
 #define	XFS_FSB_SANITY_CHECK(mp,fsb)	\
 	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6c1bddc04e3..b0667cb27d6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -580,8 +580,8 @@ xfs_buf_item_unlock(
 	 * If the buf item isn't tracking any data, free it.
 	 * Otherwise, if XFS_BLI_HOLD is set clear it.
 	 */
-	if (xfs_count_bits(bip->bli_format.blf_data_map,
-			      bip->bli_format.blf_map_size, 0) == 0) {
+	if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
+			     bip->bli_format.blf_map_size)) {
 		xfs_buf_item_relse(bp);
 	} else if (hold) {
 		bip->bli_flags &= ~XFS_BLI_HOLD;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 5b7eb81453b..f89196cb08d 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,5 +99,7 @@ struct xfs_mount_args {
  */
 #define XFSMNT2_COMPAT_IOSIZE	0x00000001	/* don't report large preferred
 						 * I/O size in stat(2) */
+#define XFSMNT2_FILESTREAMS	0x00000002	/* enable the filestreams
+						 * allocator */
 
 #endif	/* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index b33826961c4..fefd0116bac 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE_BIT      11	/* inode extent size allocator hint */
 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
 #define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
+#define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
 #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
+#define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
 
 #define XFS_DIFLAG_ANY \
 	(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
 	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
-	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
+	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 
 #endif	/* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8e8e5279334..29e091914df 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -55,9 +55,9 @@ xfs_dir_mount(
 	       XFS_MAX_BLOCKSIZE);
 	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
 	mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
-	mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
-	mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
-	mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
 	mp->m_attr_node_ents =
 		(mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
 		(uint)sizeof(xfs_da_node_entry_t);
@@ -554,7 +554,7 @@ xfs_dir2_grow_inode(
 	 */
 	if (mapp != &map)
 		kmem_free(mapp, sizeof(*mapp) * count);
-	*dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno);
+	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
 	/*
 	 * Update file's size if this is the data space and it grew.
 	 */
@@ -706,7 +706,7 @@ xfs_dir2_shrink_inode(
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	da = XFS_DIR2_DB_TO_DA(mp, db);
+	da = xfs_dir2_db_to_da(mp, db);
 	/*
 	 * Unmap the fsblock(s).
 	 */
@@ -742,7 +742,7 @@ xfs_dir2_shrink_inode(
 	/*
 	 * If the block isn't the last one in the directory, we're done.
 	 */
-	if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0))
+	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
 		return 0;
 	bno = da;
 	if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 3accc1dcd6c..e4df1aaae2a 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -115,13 +115,13 @@ xfs_dir2_block_addname(
 		xfs_da_brelse(tp, bp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
-	len = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	len = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * Set up pointers to parts of the block.
 	 */
 	bf = block->hdr.bestfree;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * No stale entries?  Need space for entry and new leaf.
 	 */
@@ -396,7 +396,7 @@ xfs_dir2_block_addname(
 	 * Fill in the leaf entry.
 	 */
 	blp[mid].hashval = cpu_to_be32(args->hashval);
-	blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
@@ -411,7 +411,7 @@ xfs_dir2_block_addname(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
@@ -455,7 +455,7 @@ xfs_dir2_block_getdents(
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) {
+	if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) {
 		*eofp = 1;
 		return 0;
 	}
@@ -471,15 +471,15 @@ xfs_dir2_block_getdents(
 	 * Extract the byte offset we start at from the seek pointer.
 	 * We'll skip entries before this.
 	 */
-	wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset);
+	wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset);
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
 	/*
 	 * Set up values for the loop.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	ptr = (char *)block->u;
-	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	p.dbp = dbp;
 	p.put = put;
 	p.uio = uio;
@@ -502,7 +502,7 @@ xfs_dir2_block_getdents(
 		/*
 		 * Bump pointer for the next iteration.
 		 */
-		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		ptr += xfs_dir2_data_entsize(dep->namelen);
 		/*
 		 * The entry is before the desired starting point, skip it.
 		 */
@@ -513,7 +513,7 @@ xfs_dir2_block_getdents(
 		 */
 		p.namelen = dep->namelen;
 
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						    ptr - (char *)block);
 		p.ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -531,7 +531,7 @@ xfs_dir2_block_getdents(
 		 */
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					(char *)dep - (char *)block);
 			xfs_da_brelse(tp, bp);
 			return error;
@@ -545,7 +545,7 @@ xfs_dir2_block_getdents(
 	*eofp = 1;
 
 	uio->uio_offset =
-		XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+		xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
 
 	xfs_da_brelse(tp, bp);
 
@@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf(
 
 	mp = tp->t_mountp;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
 		(uint)((char *)&blp[last + 1] - (char *)block - 1));
 }
@@ -589,7 +589,7 @@ xfs_dir2_block_log_tail(
 
 	mp = tp->t_mountp;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
 		(uint)((char *)(btp + 1) - (char *)block - 1));
 }
@@ -623,13 +623,13 @@ xfs_dir2_block_lookup(
 	mp = dp->i_mount;
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, release the block.
 	 */
@@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int(
 	ASSERT(bp != NULL);
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Loop doing a binary search for our hash value.
 	 * Find our entry, ENOENT if it's not there.
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
 		 * Get pointer to the entry from the leaf.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-			((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+			((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Compare, if it's right give back buffer & entry number.
 		 */
@@ -768,20 +768,20 @@ xfs_dir2_block_removename(
 	tp = args->trans;
 	mp = dp->i_mount;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry using the leaf entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, bp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Fix up the block tail.
 	 */
@@ -843,13 +843,13 @@ xfs_dir2_block_replace(
 	dp = args->dp;
 	mp = dp->i_mount;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry we need to change.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
@@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block(
 	mp = dp->i_mount;
 	leaf = lbp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * If there are data blocks other than the first one, take this
 	 * opportunity to remove trailing empty data blocks that may have
@@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block(
 	 * These will show up in the leaf bests table.
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
-		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+		bestsp = xfs_dir2_leaf_bests_p(ltp);
 		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
@@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Initialize the block tail.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
 	 * Initialize the block leaf area.  We compact out stale entries.
 	 */
-	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+	lep = xfs_dir2_block_leaf_p(btp);
 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
@@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Copy the directory into the stack buffer.
 	 * Then pitch the incore inode data so we can make extents.
@@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Fill in the tail.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
 	btp->stale = 0;
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	blp = xfs_dir2_block_leaf_p(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
 	/*
 	 * Remove the freespace, we'll manage it.
@@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block(
 	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	/*
 	 * Create entry for ..
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
-	dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	/*
@@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block(
 	if ((i = 0) == sfp->hdr.count)
 		sfep = NULL;
 	else
-		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+		sfep = xfs_dir2_sf_firstentry(sfp);
 	/*
 	 * Need to preserve the existing offset values in the sf directory.
 	 * Insert holes (unused entries) where necessary.
@@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block(
 		if (sfep == NULL)
 			newoffset = endoffset;
 		else
-			newoffset = XFS_DIR2_SF_GET_OFFSET(sfep);
+			newoffset = xfs_dir2_sf_get_offset(sfep);
 		/*
 		 * There should be a hole here, make one.
 		 */
@@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block(
 			      ((char *)block + offset);
 			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 			dup->length = cpu_to_be16(newoffset - offset);
-			*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16(
+			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
@@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block(
 		 * Copy a real entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
-		dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp,
-				XFS_DIR2_SF_INUMBERP(sfep)));
+		dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
+				xfs_dir2_sf_inumberp(sfep)));
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
-		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+		tagp = xfs_dir2_data_entry_tag_p(dep);
 		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
 					(char *)sfep->name, sfep->namelen));
-		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
 		if (++i == sfp->hdr.count)
 			sfep = NULL;
 		else
-			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/* Done with the temporary buffer */
 	kmem_free(buf, buf_len);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 6722effd0b2..e7c2606161e 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_block {
 /*
  * Pointer to the leaf header embedded in a data block (1-block format)
  */
-#define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	xfs_dir2_block_tail_p(mp,block)
 static inline xfs_dir2_block_tail_t *
 xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 {
@@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 /*
  * Pointer to the leaf entries embedded in a data block (1-block format)
  */
-#define	XFS_DIR2_BLOCK_LEAF_P(btp)	xfs_dir2_block_leaf_p(btp)
 static inline struct xfs_dir2_leaf_entry *
 xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
 {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c211c37ef67..7ebe295bd6d 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -72,8 +72,8 @@ xfs_dir2_data_check(
 	bf = d->hdr.bestfree;
 	p = (char *)d->u;
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		lep = xfs_dir2_block_leaf_p(btp);
 		endp = (char *)lep;
 	} else
 		endp = (char *)d + mp->m_dirblksize;
@@ -107,7 +107,7 @@ xfs_dir2_data_check(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
-			ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) ==
+			ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
 			       (char *)dup - (char *)d);
 			dfp = xfs_dir2_data_freefind(d, dup);
 			if (dfp) {
@@ -131,12 +131,12 @@ xfs_dir2_data_check(
 		dep = (xfs_dir2_data_entry_t *)p;
 		ASSERT(dep->namelen != 0);
 		ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
-		ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
+		ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
 		if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-			addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
@@ -147,7 +147,7 @@ xfs_dir2_data_check(
 			}
 			ASSERT(i < be32_to_cpu(btp->count));
 		}
-		p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		p += xfs_dir2_data_entsize(dep->namelen);
 	}
 	/*
 	 * Need to have seen all the entries and all the bestfree slots.
@@ -346,8 +346,8 @@ xfs_dir2_data_freescan(
 	 */
 	p = (char *)d->u;
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		endp = (char *)xfs_dir2_block_leaf_p(btp);
 	} else
 		endp = (char *)d + mp->m_dirblksize;
 	/*
@@ -360,7 +360,7 @@ xfs_dir2_data_freescan(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
-			       be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
 			p += be16_to_cpu(dup->length);
 		}
@@ -370,8 +370,8 @@ xfs_dir2_data_freescan(
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
 			ASSERT((char *)dep - (char *)d ==
-			       be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)));
-			p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+			       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
+			p += xfs_dir2_data_entsize(dep->namelen);
 		}
 	}
 }
@@ -402,7 +402,7 @@ xfs_dir2_data_init(
 	/*
 	 * Get the buffer set up for the block.
 	 */
-	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp,
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
 		XFS_DATA_FORK);
 	if (error) {
 		return error;
@@ -427,7 +427,7 @@ xfs_dir2_data_init(
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
-	*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d);
+	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Log it and return it.
 	 */
@@ -452,7 +452,7 @@ xfs_dir2_data_log_entry(
 	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
 	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
-		(uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
+		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
 		       (char *)d - 1));
 }
 
@@ -497,8 +497,8 @@ xfs_dir2_data_log_unused(
 	 * Log the end (tag) of the unused entry.
 	 */
 	xfs_da_log_buf(tp, bp,
-		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d),
-		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d +
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
 		       sizeof(xfs_dir2_data_off_t) - 1));
 }
 
@@ -535,8 +535,8 @@ xfs_dir2_data_make_free(
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
 		ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	}
 	/*
 	 * If this isn't the start of the block, then back up to
@@ -587,7 +587,7 @@ xfs_dir2_data_make_free(
 		 * Fix up the new big freespace.
 		 */
 		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		if (!needscan) {
@@ -621,7 +621,7 @@ xfs_dir2_data_make_free(
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
 		be16_add(&prevdup->length, len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		/*
@@ -649,7 +649,7 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -676,7 +676,7 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
@@ -712,7 +712,7 @@ xfs_dir2_data_use_free(
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-	ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+	ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
@@ -745,7 +745,7 @@ xfs_dir2_data_use_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(oldlen - len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -772,7 +772,7 @@ xfs_dir2_data_use_free(
 	else if (matchback) {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -799,13 +799,13 @@ xfs_dir2_data_use_free(
 	else {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) =
+		*xfs_dir2_data_unused_tag_p(newdup2) =
 			cpu_to_be16((char *)newdup2 - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
 		/*
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index c94c9099cfb..b816e025273 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -44,7 +44,7 @@ struct xfs_trans;
 #define	XFS_DIR2_DATA_SPACE	0
 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_DATA_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
 
 /*
  * Offsets of . and .. in data space (always block 0)
@@ -52,9 +52,9 @@ struct xfs_trans;
 #define	XFS_DIR2_DATA_DOT_OFFSET	\
 	((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
 #define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
-	(XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1))
+	(XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
 #define	XFS_DIR2_DATA_FIRST_OFFSET		\
-	(XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2))
+	(XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
 
 /*
  * Structures.
@@ -123,7 +123,6 @@ typedef struct xfs_dir2_data {
 /*
  * Size of a data entry.
  */
-#define XFS_DIR2_DATA_ENTSIZE(n)	xfs_dir2_data_entsize(n)
 static inline int xfs_dir2_data_entsize(int n)
 {
 	return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
@@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n)
 /*
  * Pointer to an entry's tag word.
  */
-#define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
 static inline __be16 *
 xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
 {
 	return (__be16 *)((char *)dep +
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16));
+		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
 }
 
 /*
  * Pointer to a freespace's tag word.
  */
-#define	XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
-	xfs_dir2_data_unused_tag_p(dup)
 static inline __be16 *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index db14ea71459..1b73c9ad646 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf(
 	if ((error = xfs_da_grow_inode(args, &blkno))) {
 		return error;
 	}
-	ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
+	ldb = xfs_dir2_da_to_db(mp, blkno);
 	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
 	/*
 	 * Initialize the leaf block, get a buffer for it.
@@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf(
 	leaf = lbp->data;
 	block = dbp->data;
 	xfs_dir2_data_check(dp, dbp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Set the counts in the leaf header.
 	 */
@@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set up leaf tail and bests table.
 	 */
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ltp->bestcount = cpu_to_be32(1);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
 	 * Log the data header and leaf bests table.
@@ -209,9 +209,9 @@ xfs_dir2_leaf_addname(
 	 */
 	index = xfs_dir2_leaf_search_hash(args, lbp);
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
+	length = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * See if there are any entries with the same hash value
 	 * and space in their block for the new entry.
@@ -223,7 +223,7 @@ xfs_dir2_leaf_addname(
 	     index++, lep++) {
 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
-		i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
@@ -378,7 +378,7 @@ xfs_dir2_leaf_addname(
 	 */
 	else {
 		if ((error =
-		    xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
+		    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
 			    -1, &dbp, XFS_DATA_FORK))) {
 			xfs_da_brelse(tp, lbp);
 			return error;
@@ -407,7 +407,7 @@ xfs_dir2_leaf_addname(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	/*
 	 * Need to scan fix up the bestfree table.
@@ -529,7 +529,7 @@ xfs_dir2_leaf_addname(
 	 * Fill in the new leaf entry.
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
+	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
 				be16_to_cpu(*tagp)));
 	/*
 	 * Log the leaf fields and give up the buffers.
@@ -567,13 +567,13 @@ xfs_dir2_leaf_check(
 	 * Should factor in the size of the bests table as well.
 	 * We can deduce a value for that from di_size.
 	 */
-	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * Leaves and bests don't overlap.
 	 */
 	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
-	       (char *)XFS_DIR2_LEAF_BESTS_P(ltp));
+	       (char *)xfs_dir2_leaf_bests_p(ltp));
 	/*
 	 * Check hash value order, count stale entries.
 	 */
@@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents(
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
-	curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset);
+	curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset);
 	/*
 	 * Force this conversion through db so we truncate the offset
 	 * down to get the start of the data block.
 	 */
-	map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff));
+	map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
 	/*
 	 * Loop over directory entries until we reach the end offset.
 	 * Get more blocks and readahead as necessary.
@@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents(
 			 */
 			if (1 + ra_want > map_blocks &&
 			    map_off <
-			    XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) {
+			    xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
 				/*
 				 * Get more bmaps, fill in after the ones
 				 * we already have in the table.
@@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents(
 				nmap = map_size - map_valid;
 				error = xfs_bmapi(tp, dp,
 					map_off,
-					XFS_DIR2_BYTE_TO_DA(mp,
+					xfs_dir2_byte_to_da(mp,
 						XFS_DIR2_LEAF_OFFSET) - map_off,
 					XFS_BMAPI_METADATA, NULL, 0,
 					&map[map_valid], &nmap, NULL, NULL);
@@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents(
 					map[map_valid + nmap - 1].br_blockcount;
 				else
 					map_off =
-						XFS_DIR2_BYTE_TO_DA(mp,
+						xfs_dir2_byte_to_da(mp,
 							XFS_DIR2_LEAF_OFFSET);
 				/*
 				 * Look for holes in the mapping, and
@@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents(
 			 * No valid mappings, so no more data blocks.
 			 */
 			if (!map_valid) {
-				curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off);
+				curoff = xfs_dir2_da_to_byte(mp, map_off);
 				break;
 			}
 			/*
 			 * Read the directory block starting at the first
 			 * mapping.
 			 */
-			curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff);
+			curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
 			error = xfs_da_read_buf(tp, dp, map->br_startoff,
 				map->br_blockcount >= mp->m_dirblkfsbs ?
 				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
@@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents(
 			/*
 			 * Having done a read, we need to set a new offset.
 			 */
-			newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0);
+			newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
 			/*
 			 * Start of the current block.
 			 */
@@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents(
 			 * Make sure we're in the right block.
 			 */
 			else if (curoff > newoff)
-				ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) ==
+				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
 				       curdb);
 			data = bp->data;
 			xfs_dir2_data_check(dp, bp);
@@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents(
 			 * Find our position in the block.
 			 */
 			ptr = (char *)&data->u;
-			byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff);
+			byteoff = xfs_dir2_byte_to_off(mp, curoff);
 			/*
 			 * Skip past the header.
 			 */
@@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents(
 					}
 					dep = (xfs_dir2_data_entry_t *)ptr;
 					length =
-					   XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+					   xfs_dir2_data_entsize(dep->namelen);
 					ptr += length;
 				}
 				/*
 				 * Now set our real offset.
 				 */
 				curoff =
-					XFS_DIR2_DB_OFF_TO_BYTE(mp,
-					    XFS_DIR2_BYTE_TO_DB(mp, curoff),
+					xfs_dir2_db_off_to_byte(mp,
+					    xfs_dir2_byte_to_db(mp, curoff),
 					    (char *)ptr - (char *)data);
 				if (ptr >= (char *)data + mp->m_dirblksize) {
 					continue;
@@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents(
 
 		p->namelen = dep->namelen;
 
-		length = XFS_DIR2_DATA_ENTSIZE(p->namelen);
+		length = xfs_dir2_data_entsize(p->namelen);
 
-		p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
+		p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length);
 
 		p->ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents(
 	 * All done.  Set output offset value to current offset.
 	 */
 	*eofp = eof;
-	if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR))
+	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
 		uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
 	else
-		uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
+		uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff);
 	kmem_free(map, map_size * sizeof(*map));
 	kmem_free(p, sizeof(*p));
 	if (bp)
@@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init(
 	/*
 	 * Get the buffer for the block.
 	 */
-	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
 		XFS_DATA_FORK);
 	if (error) {
 		return error;
@@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init(
 	 * the block.
 	 */
 	if (magic == XFS_DIR2_LEAF1_MAGIC) {
-		ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 		ltp->bestcount = 0;
 		xfs_dir2_leaf_log_tail(tp, bp);
 	}
@@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests(
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
-	firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first;
-	lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last;
+	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
+	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
+	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
 	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
 }
@@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail(
 	mp = tp->t_mountp;
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
 }
@@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number.
 	 */
@@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int(
 		/*
 		 * Get the new data block number.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		/*
 		 * If it's not the same as the old data block number,
 		 * need to pitch the old one and read the new one.
@@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int(
 				xfs_da_brelse(tp, dbp);
 			if ((error =
 			    xfs_da_read_buf(tp, dp,
-				    XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
+				    xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
 				    XFS_DATA_FORK))) {
 				xfs_da_brelse(tp, lbp);
 				return error;
@@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)dbp->data +
-		       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+		       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
 		 * If it matches then return it.
 		 */
@@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename(
 	 * Point to the leaf entry, use that to point to the data entry.
 	 */
 	lep = &leaf->ents[index];
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+	      ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
 	/*
 	 * Mark the former data entry unused.
 	 */
 	xfs_dir2_data_make_free(tp, dbp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
@@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
 	/*
 	 * Put the new inode number in, log it.
@@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data(
 	/*
 	 * Read the offending data block.  We need its buffer.
 	 */
-	if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
+	if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
 			XFS_DATA_FORK))) {
 		return error;
 	}
@@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data(
 	 */
 
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
@@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data(
 	/*
 	 * Eliminate the last bests entry from the table.
 	 */
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	be32_add(&ltp->bestcount, -1);
 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf(
 	/*
 	 * Set up the leaf tail from the freespace block.
 	 */
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ltp->bestcount = free->hdr.nvalid;
 	/*
 	 * Set up the leaf bests table.
 	 */
-	memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests,
+	memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
 		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index f57ca116241..70c97f3f815 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -32,7 +32,7 @@ struct xfs_trans;
 #define	XFS_DIR2_LEAF_SPACE	1
 #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
 
 /*
  * Offset in data space of a data entry.
@@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf {
  * DB blocks here are logical directory block numbers, not filesystem blocks.
  */
 
-#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	xfs_dir2_max_leaf_ents(mp)
 static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 {
 	return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
@@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 /*
  * Get address of the bestcount field in the single-leaf block.
  */
-#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	xfs_dir2_leaf_tail_p(mp, lp)
 static inline xfs_dir2_leaf_tail_t *
 xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 {
@@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 /*
  * Get address of the bests array in the single-leaf block.
  */
-#define	XFS_DIR2_LEAF_BESTS_P(ltp)	xfs_dir2_leaf_bests_p(ltp)
 static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
@@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 /*
  * Convert dataptr to byte in file space
  */
-#define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	xfs_dir2_dataptr_to_byte(mp, dp)
 static inline xfs_dir2_off_t
 xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
@@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 /*
  * Convert byte in file space to dataptr.  It had better be aligned.
  */
-#define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	xfs_dir2_byte_to_dataptr(mp,by)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert byte in space to (DB) block
  */
-#define	XFS_DIR2_BYTE_TO_DB(mp,by)	xfs_dir2_byte_to_db(mp, by)
 static inline xfs_dir2_db_t
 xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert dataptr to a block number
  */
-#define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	xfs_dir2_dataptr_to_db(mp, dp)
 static inline xfs_dir2_db_t
 xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 
 /*
  * Convert byte in space to offset in a block
  */
-#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	xfs_dir2_byte_to_off(mp, by)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert dataptr to a byte offset in a block
  */
-#define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	xfs_dir2_dataptr_to_off(mp, dp)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 
 /*
  * Convert block and offset to byte in space
  */
-#define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
-	xfs_dir2_db_off_to_byte(mp, db, o)
 static inline xfs_dir2_off_t
 xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
 			xfs_dir2_data_aoff_t o)
@@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
 /*
  * Convert block (DB) to block (dablk)
  */
-#define	XFS_DIR2_DB_TO_DA(mp,db)	xfs_dir2_db_to_da(mp, db)
 static inline xfs_dablk_t
 xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
  * Convert byte in space to (DA) block
  */
-#define	XFS_DIR2_BYTE_TO_DA(mp,by)	xfs_dir2_byte_to_da(mp, by)
 static inline xfs_dablk_t
 xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
-	return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by));
+	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
 }
 
 /*
  * Convert block and offset to dataptr
  */
-#define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
-	xfs_dir2_db_off_to_dataptr(mp, db, o)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
 			   xfs_dir2_data_aoff_t o)
 {
-	return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o));
+	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
 }
 
 /*
  * Convert block (dablk) to block (DB)
  */
-#define	XFS_DIR2_DA_TO_DB(mp,da)	xfs_dir2_da_to_db(mp, da)
 static inline xfs_dir2_db_t
 xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 {
@@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 /*
  * Convert block (dablk) to byte offset in space
  */
-#define XFS_DIR2_DA_TO_BYTE(mp,da)	xfs_dir2_da_to_byte(mp, da)
 static inline xfs_dir2_off_t
 xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
 {
-	return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0);
+	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index d083c381993..91c61d9632c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node(
 	/*
 	 * Get the buffer for the new freespace block.
 	 */
-	if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+	if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
 			XFS_DATA_FORK))) {
 		return error;
 	}
 	ASSERT(fbp != NULL);
 	free = fbp->data;
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * Initialize the freespace block header.
 	 */
@@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node(
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
 	 */
-	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
+	for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
 	     i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
 		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
@@ -215,7 +215,7 @@ xfs_dir2_leafn_add(
 	 * a compact.
 	 */
 
-	if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+	if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
 		if (!leaf->hdr.stale)
 			return XFS_ERROR(ENOSPC);
 		compact = be16_to_cpu(leaf->hdr.stale) > 1;
@@ -327,7 +327,7 @@ xfs_dir2_leafn_add(
 	 * Insert the new entry, log everything.
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp,
+	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
 				args->blkno, args->index));
 	xfs_dir2_leaf_log_header(tp, bp);
 	xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
@@ -352,7 +352,7 @@ xfs_dir2_leafn_check(
 	leaf = bp->data;
 	mp = dp->i_mount;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
 			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
@@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int(
 	if (args->addname) {
 		curfdb = curbp ? state->extrablk.blkno : -1;
 		curdb = -1;
-		length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+		length = xfs_dir2_data_entsize(args->namelen);
 		if ((free = (curbp ? curbp->data : NULL)))
 			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
@@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int(
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		/*
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
@@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int(
 				 * Convert the data block to the free block
 				 * holding its freespace information.
 				 */
-				newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb);
+				newfdb = xfs_dir2_db_to_fdb(mp, newdb);
 				/*
 				 * If it's not the one we have in hand,
 				 * read it in.
@@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int(
 					 * Read the free block.
 					 */
 					if ((error = xfs_da_read_buf(tp, dp,
-							XFS_DIR2_DB_TO_DA(mp,
+							xfs_dir2_db_to_da(mp,
 								newfdb),
 							-1, &curbp,
 							XFS_DATA_FORK))) {
@@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int(
 				/*
 				 * Get the index for our entry.
 				 */
-				fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb);
+				fi = xfs_dir2_db_to_fdindex(mp, curdb);
 				/*
 				 * If it has room, return it.
 				 */
@@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int(
 				 */
 				if ((error =
 				    xfs_da_read_buf(tp, dp,
-					    XFS_DIR2_DB_TO_DA(mp, newdb), -1,
+					    xfs_dir2_db_to_da(mp, newdb), -1,
 					    &curbp, XFS_DATA_FORK))) {
 					return error;
 				}
@@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			dep = (xfs_dir2_data_entry_t *)
 			      ((char *)curbp->data +
-			       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+			       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 			/*
 			 * Compare the entry, return it if it matches.
 			 */
@@ -876,9 +876,9 @@ xfs_dir2_leafn_remove(
 	/*
 	 * Extract the data block and offset from the entry.
 	 */
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->blkno == db);
-	off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address));
+	off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->index == off);
 	/*
 	 * Kill the leaf entry by marking it stale.
@@ -898,7 +898,7 @@ xfs_dir2_leafn_remove(
 	longest = be16_to_cpu(data->hdr.bestfree[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Rescan the data block freespaces for bestfree.
 	 * Log the data block header if needed.
@@ -924,8 +924,8 @@ xfs_dir2_leafn_remove(
 		 * Convert the data block number to a free block,
 		 * read in the free block.
 		 */
-		fdb = XFS_DIR2_DB_TO_FDB(mp, db);
-		if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb),
+		fdb = xfs_dir2_db_to_fdb(mp, db);
+		if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
 				-1, &fbp, XFS_DATA_FORK))) {
 			return error;
 		}
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
 		/*
 		 * Calculate which entry we need to fix.
 		 */
-		findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
+		findex = xfs_dir2_db_to_fdindex(mp, db);
 		longest = be16_to_cpu(data->hdr.bestfree[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
@@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split(
 	/*
 	 * Initialize the new leaf block.
 	 */
-	error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno),
+	error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
 		&newblk->bp, XFS_DIR2_LEAFN_MAGIC);
 	if (error) {
 		return error;
@@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int(
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	length = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * If we came in with a freespace block that means that lookup
 	 * found an entry with our hash value.  This is the freespace
@@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int(
 
 		if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
 			return error;
-		lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo);
+		lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
 		fbno = ifbno;
 	}
 	/*
@@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int(
 			 * to avoid it.
 			 */
 			if ((error = xfs_da_read_buf(tp, dp,
-					XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+					xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
 					XFS_DATA_FORK))) {
 				return error;
 			}
@@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int(
 		 * Get the freespace block corresponding to the data block
 		 * that was just allocated.
 		 */
-		fbno = XFS_DIR2_DB_TO_FDB(mp, dbno);
+		fbno = xfs_dir2_db_to_fdb(mp, dbno);
 		if (unlikely(error = xfs_da_read_buf(tp, dp,
-				XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+				xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
 				XFS_DATA_FORK))) {
 			xfs_da_buf_done(dbp);
 			return error;
@@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int(
 				return error;
 			}
 
-			if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) {
+			if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
 				cmn_err(CE_ALERT,
 					"xfs_dir2_node_addname_int: dir ino "
 					"%llu needed freesp block %lld for\n"
 					"  data block %lld, got %lld\n"
 					"  ifbno %llu lastfbno %d\n",
 					(unsigned long long)dp->i_ino,
-					(long long)XFS_DIR2_DB_TO_FDB(mp, dbno),
+					(long long)xfs_dir2_db_to_fdb(mp, dbno),
 					(long long)dbno, (long long)fbno,
 					(unsigned long long)ifbno, lastfbno);
 				if (fblk) {
@@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int(
 			 * Get a buffer for the new block.
 			 */
 			if ((error = xfs_da_get_buf(tp, dp,
-						   XFS_DIR2_DB_TO_DA(mp, fbno),
+						   xfs_dir2_db_to_da(mp, fbno),
 						   -1, &fbp, XFS_DATA_FORK))) {
 				return error;
 			}
@@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int(
 		/*
 		 * Set the freespace block index from the data block number.
 		 */
-		findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+		findex = xfs_dir2_db_to_fdindex(mp, dbno);
 		/*
 		 * If it's after the end of the current entries in the
 		 * freespace block, extend that table.
@@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int(
 		 * Read the data block in.
 		 */
 		if (unlikely(
-		    error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno),
+		    error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
 				-1, &dbp, XFS_DATA_FORK))) {
 			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
 				xfs_da_buf_done(fbp);
@@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
@@ -1904,7 +1904,7 @@ xfs_dir2_node_replace(
 		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
-		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
+		       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
 		ASSERT(inum != be64_to_cpu(dep->inumber));
 		/*
 		 * Fill in the new inode number and log the entry.
@@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free(
 	 * Blow the block away.
 	 */
 	if ((error =
-	    xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo),
+	    xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
 		    bp))) {
 		/*
 		 * Can't fail with ENOSPC since that only happens with no
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index c7c870ee785..dde72db3d69 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -36,7 +36,7 @@ struct xfs_trans;
 #define	XFS_DIR2_FREE_SPACE	2
 #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_FREE_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
 
 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
 
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_free {
 /*
  * Convert data space db to the corresponding free db.
  */
-#define	XFS_DIR2_DB_TO_FDB(mp,db)	xfs_dir2_db_to_fdb(mp, db)
 static inline xfs_dir2_db_t
 xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
  * Convert data space db to the corresponding index in a free db.
  */
-#define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	xfs_dir2_db_to_fdindex(mp, db)
 static inline int
 xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 0cd77b17bf9..38fc4f22b76 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -89,8 +89,8 @@ xfs_dir2_block_sfsize(
 	mp = dp->i_mount;
 
 	count = i8count = namelen = 0;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 
 	/*
 	 * Iterate over the block's data entries by using the leaf pointers.
@@ -102,7 +102,7 @@ xfs_dir2_block_sfsize(
 		 * Calculate the pointer to the entry at hand.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+		      ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Detect . and .., so we can special-case them.
 		 * . is not included in sf directories.
@@ -124,7 +124,7 @@ xfs_dir2_block_sfsize(
 		/*
 		 * Calculate the new size, see if we should give up yet.
 		 */
-		size = XFS_DIR2_SF_HDR_SIZE(i8count) +		/* header */
+		size = xfs_dir2_sf_hdr_size(i8count) +		/* header */
 		       count +					/* namelen */
 		       count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
 		       namelen +				/* name */
@@ -139,7 +139,7 @@ xfs_dir2_block_sfsize(
 	 */
 	sfhp->count = count;
 	sfhp->i8count = i8count;
-	XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+	xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
 	return size;
 }
 
@@ -199,15 +199,15 @@ xfs_dir2_block_to_sf(
 	 * Copy the header into the newly allocate local space.
 	 */
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+	memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
 	dp->i_d.di_size = size;
 	/*
 	 * Set up to loop over the block's entries.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	ptr = (char *)block->u;
-	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
-	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
+	sfep = xfs_dir2_sf_firstentry(sfp);
 	/*
 	 * Loop over the active and unused entries.
 	 * Stop when we reach the leaf/tail portion of the block.
@@ -233,22 +233,22 @@ xfs_dir2_block_to_sf(
 		else if (dep->namelen == 2 &&
 			 dep->name[0] == '.' && dep->name[1] == '.')
 			ASSERT(be64_to_cpu(dep->inumber) ==
-			       XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+			       xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 		/*
 		 * Normal entry, copy it into shortform.
 		 */
 		else {
 			sfep->namelen = dep->namelen;
-			XFS_DIR2_SF_PUT_OFFSET(sfep,
+			xfs_dir2_sf_put_offset(sfep,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)block));
 			memcpy(sfep->name, dep->name, dep->namelen);
 			temp = be64_to_cpu(dep->inumber);
-			XFS_DIR2_SF_PUT_INUMBER(sfp, &temp,
-				XFS_DIR2_SF_INUMBERP(sfep));
-			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			xfs_dir2_sf_put_inumber(sfp, &temp,
+				xfs_dir2_sf_inumberp(sfep));
+			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		}
-		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		ptr += xfs_dir2_data_entsize(dep->namelen);
 	}
 	ASSERT((char *)sfep - (char *)sfp == size);
 	xfs_dir2_sf_check(args);
@@ -294,11 +294,11 @@ xfs_dir2_sf_addname(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	incr_isize = add_entsize;
 	objchange = 0;
 #if XFS_BIG_INUMS
@@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy(
 	/*
 	 * Grow the in-inode space.
 	 */
-	xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen),
+	xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
 		XFS_DATA_FORK);
 	/*
 	 * Need to set up again due to realloc of the inode data.
@@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy(
 	 * Fill in the new entry.
 	 */
 	sfep->namelen = args->namelen;
-	XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-		XFS_DIR2_SF_INUMBERP(sfep));
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
 	/*
 	 * Update the header and inode.
 	 */
@@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard(
 	 * If it's going to end up at the end then oldsfep will point there.
 	 */
 	for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
-	      oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp),
-	      add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen),
+	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
+	      add_datasize = xfs_dir2_data_entsize(args->namelen),
 	      eof = (char *)oldsfep == &buf[old_isize];
 	     !eof;
-	     offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen),
-	      oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep),
+	     offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
+	      oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
 	      eof = (char *)oldsfep == &buf[old_isize]) {
-		new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep);
+		new_offset = xfs_dir2_sf_get_offset(oldsfep);
 		if (offset + add_datasize <= new_offset)
 			break;
 	}
@@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard(
 	 * Fill in the new entry, and update the header counts.
 	 */
 	sfep->namelen = args->namelen;
-	XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-		XFS_DIR2_SF_INUMBERP(sfep));
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
 	sfp->hdr.count++;
 #if XFS_BIG_INUMS
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard(
 	 * If there's more left to copy, do that.
 	 */
 	if (!eof) {
-		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
 	kmem_free(buf, old_isize);
@@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick(
 	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	size = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	size = xfs_dir2_data_entsize(args->namelen);
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
-	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	sfep = xfs_dir2_sf_firstentry(sfp);
 	holefit = 0;
 	/*
 	 * Loop over sf entries.
@@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick(
 	 */
 	for (i = 0; i < sfp->hdr.count; i++) {
 		if (!holefit)
-			holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep);
-		offset = XFS_DIR2_SF_GET_OFFSET(sfep) +
-			 XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
-		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
+		offset = xfs_dir2_sf_get_offset(sfep) +
+			 xfs_dir2_data_entsize(sfep->namelen);
+		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/*
 	 * Calculate data bytes used excluding the new entry, if this
@@ -617,18 +617,18 @@ xfs_dir2_sf_check(
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
-	ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
-		ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset);
-		ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
+		ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
 		offset =
-			XFS_DIR2_SF_GET_OFFSET(sfep) +
-			XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+			xfs_dir2_sf_get_offset(sfep) +
+			xfs_dir2_data_entsize(sfep->namelen);
 	}
 	ASSERT(i8count == sfp->hdr.i8count);
 	ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -671,7 +671,7 @@ xfs_dir2_sf_create(
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
 	ASSERT(dp->i_df.if_bytes == 0);
 	i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
-	size = XFS_DIR2_SF_HDR_SIZE(i8count);
+	size = xfs_dir2_sf_hdr_size(i8count);
 	/*
 	 * Make a buffer for the data.
 	 */
@@ -684,7 +684,7 @@ xfs_dir2_sf_create(
 	/*
 	 * Now can put in the inode number, since i8count is set.
 	 */
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
 	sfp->hdr.count = 0;
 	dp->i_d.di_size = size;
 	xfs_dir2_sf_check(args);
@@ -727,12 +727,12 @@ xfs_dir2_sf_getdents(
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) {
+	if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) {
 		*eofp = 1;
 		return 0;
 	}
@@ -747,9 +747,9 @@ xfs_dir2_sf_getdents(
 	 * Put . entry unless we're starting past it.
 	 */
 	if (dir_offset <=
-		    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					       XFS_DIR2_DATA_DOT_OFFSET)) {
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, 0,
 						XFS_DIR2_DATA_DOTDOT_OFFSET);
 		p.ino = dp->i_ino;
 #if XFS_BIG_INUMS
@@ -762,7 +762,7 @@ xfs_dir2_sf_getdents(
 
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						XFS_DIR2_DATA_DOT_OFFSET);
 			return error;
 		}
@@ -772,11 +772,11 @@ xfs_dir2_sf_getdents(
 	 * Put .. entry unless we're starting past it.
 	 */
 	if (dir_offset <=
-		    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					       XFS_DIR2_DATA_DOTDOT_OFFSET)) {
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						XFS_DIR2_DATA_FIRST_OFFSET);
-		p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 #if XFS_BIG_INUMS
 		p.ino += mp->m_inoadd;
 #endif
@@ -787,7 +787,7 @@ xfs_dir2_sf_getdents(
 
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					XFS_DIR2_DATA_DOTDOT_OFFSET);
 			return error;
 		}
@@ -796,23 +796,23 @@ xfs_dir2_sf_getdents(
 	/*
 	 * Loop while there are more entries and put'ing works.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 		     i < sfp->hdr.count;
-			     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+			     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 
-		off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
-				XFS_DIR2_SF_GET_OFFSET(sfep));
+		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+				xfs_dir2_sf_get_offset(sfep));
 
 		if (dir_offset > off)
 			continue;
 
 		p.namelen = sfep->namelen;
 
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
-			XFS_DIR2_SF_GET_OFFSET(sfep) +
-			XFS_DIR2_DATA_ENTSIZE(p.namelen));
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+			xfs_dir2_sf_get_offset(sfep) +
+			xfs_dir2_data_entsize(p.namelen));
 
-		p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+		p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 #if XFS_BIG_INUMS
 		p.ino += mp->m_inoadd;
 #endif
@@ -832,7 +832,7 @@ xfs_dir2_sf_getdents(
 	*eofp = 1;
 
 	uio->uio_offset =
-		XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+		xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
 
 	return 0;
 }
@@ -865,7 +865,7 @@ xfs_dir2_sf_lookup(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Special case for .
 	 */
@@ -878,21 +878,21 @@ xfs_dir2_sf_lookup(
 	 */
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
-		args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		return XFS_ERROR(EEXIST);
 	}
 	/*
 	 * Loop over all the entries trying to match ours.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
 		    memcmp(args->name, sfep->name, args->namelen) == 0) {
 			args->inumber =
-				XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep));
 			return XFS_ERROR(EEXIST);
 		}
 	}
@@ -934,19 +934,19 @@ xfs_dir2_sf_removename(
 	ASSERT(dp->i_df.if_bytes == oldsize);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Loop over the old directory entries.
 	 * Find the one we're deleting.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
 		    memcmp(sfep->name, args->name, args->namelen) == 0) {
-			ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep)) ==
+			ASSERT(xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep)) ==
 				args->inumber);
 			break;
 		}
@@ -961,7 +961,7 @@ xfs_dir2_sf_removename(
 	 * Calculate sizes.
 	 */
 	byteoff = (int)((char *)sfep - (char *)sfp);
-	entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	newsize = oldsize - entsize;
 	/*
 	 * Copy the part if any after the removed entry, sliding it down.
@@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 #if XFS_BIG_INUMS
 	/*
 	 * New inode number is large, and need to convert to 8-byte inodes.
@@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace(
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
 #if XFS_BIG_INUMS || defined(DEBUG)
-		ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		ASSERT(args->inumber != ino);
 #endif
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent);
+		xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
 	}
 	/*
 	 * Normal entry, look for the name.
 	 */
 	else {
-		for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 		     i < sfp->hdr.count;
-		     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 			if (sfep->namelen == args->namelen &&
 			    sfep->name[0] == args->name[0] &&
 			    memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-				ino = XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				ino = xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep));
 				ASSERT(args->inumber != ino);
 #endif
-				XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+					xfs_dir2_sf_inumberp(sfep));
 				break;
 			}
 		}
@@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4(
 	 */
 	sfp->hdr.count = oldsfp->hdr.count;
 	sfp->hdr.i8count = 0;
-	ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
-		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
-		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
+		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
-			XFS_DIR2_SF_INUMBERP(oldsfep));
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
@@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8(
 	 */
 	sfp->hdr.count = oldsfp->hdr.count;
 	sfp->hdr.i8count = 1;
-	ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
-		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
-		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
+		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
-			XFS_DIR2_SF_INUMBERP(oldsfep));
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 42f015b7001..11e503209af 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf {
 	xfs_dir2_sf_entry_t	list[1];	/* shortform entries */
 } xfs_dir2_sf_t;
 
-#define	XFS_DIR2_SF_HDR_SIZE(i8count)	xfs_dir2_sf_hdr_size(i8count)
 static inline int xfs_dir2_sf_hdr_size(int i8count)
 {
 	return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
@@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define	XFS_DIR2_SF_INUMBERP(sfep)	xfs_dir2_sf_inumberp(sfep)
 static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
 {
 	return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
 }
 
-#define	XFS_DIR2_SF_GET_INUMBER(sfp, from) \
-	xfs_dir2_sf_get_inumber(sfp, from)
 static inline xfs_intino_t
 xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
 {
@@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
 		(xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
 }
 
-#define	XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
-	xfs_dir2_sf_put_inumber(sfp,from,to)
 static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
 						xfs_dir2_inou_t *to)
 {
@@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
 		XFS_PUT_DIR_INO8(*(from), (to)->i8);
 }
 
-#define	XFS_DIR2_SF_GET_OFFSET(sfep)	\
-	xfs_dir2_sf_get_offset(sfep)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
 {
 	return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
 }
 
-#define	XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
-	xfs_dir2_sf_put_offset(sfep,off)
 static inline void
 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
 {
 	INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
 }
 
-#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	\
-	xfs_dir2_sf_entsize_byname(sfp,len)
 static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
 {
 	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
@@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	\
-	xfs_dir2_sf_entsize_byentry(sfp,sfep)
 static inline int
 xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
@@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define XFS_DIR2_SF_FIRSTENTRY(sfp)	xfs_dir2_sf_firstentry(sfp)
 static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
 {
 	return ((xfs_dir2_sf_entry_t *) \
-		((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)));
+		((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
 }
 
-#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)	xfs_dir2_sf_nextentry(sfp,sfep)
 static inline xfs_dir2_sf_entry_t *
 xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
 	return ((xfs_dir2_sf_entry_t *) \
-		((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)));
+		((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
 }
 
 /*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
new file mode 100644
index 00000000000..ce2278611bb
--- /dev/null
+++ b/fs/xfs/xfs_filestream.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inum.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_bmap.h"
+#include "xfs_alloc.h"
+#include "xfs_utils.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+
+#ifdef XFS_FILESTREAMS_TRACE
+
+ktrace_t *xfs_filestreams_trace_buf;
+
+STATIC void
+xfs_filestreams_trace(
+	xfs_mount_t	*mp,	/* mount point */
+	int		type,	/* type of trace */
+	const char	*func,	/* source function */
+	int		line,	/* source line number */
+	__psunsigned_t	arg0,
+	__psunsigned_t	arg1,
+	__psunsigned_t	arg2,
+	__psunsigned_t	arg3,
+	__psunsigned_t	arg4,
+	__psunsigned_t	arg5)
+{
+	ktrace_enter(xfs_filestreams_trace_buf,
+		(void *)(__psint_t)(type | (line << 16)),
+		(void *)func,
+		(void *)(__psunsigned_t)current_pid(),
+		(void *)mp,
+		(void *)(__psunsigned_t)arg0,
+		(void *)(__psunsigned_t)arg1,
+		(void *)(__psunsigned_t)arg2,
+		(void *)(__psunsigned_t)arg3,
+		(void *)(__psunsigned_t)arg4,
+		(void *)(__psunsigned_t)arg5,
+		NULL, NULL, NULL, NULL, NULL, NULL);
+}
+
+#define TRACE0(mp,t)			TRACE6(mp,t,0,0,0,0,0,0)
+#define TRACE1(mp,t,a0)			TRACE6(mp,t,a0,0,0,0,0,0)
+#define TRACE2(mp,t,a0,a1)		TRACE6(mp,t,a0,a1,0,0,0,0)
+#define TRACE3(mp,t,a0,a1,a2)		TRACE6(mp,t,a0,a1,a2,0,0,0)
+#define TRACE4(mp,t,a0,a1,a2,a3)	TRACE6(mp,t,a0,a1,a2,a3,0,0)
+#define TRACE5(mp,t,a0,a1,a2,a3,a4)	TRACE6(mp,t,a0,a1,a2,a3,a4,0)
+#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
+	xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
+				(__psunsigned_t)a0, (__psunsigned_t)a1, \
+				(__psunsigned_t)a2, (__psunsigned_t)a3, \
+				(__psunsigned_t)a4, (__psunsigned_t)a5)
+
+#define TRACE_AG_SCAN(mp, ag, ag2) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
+#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
+		TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
+			 cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
+		TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
+		TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
+
+
+#else
+#define TRACE_AG_SCAN(mp, ag, ag2)
+#define TRACE_AG_PICK1(mp, max_ag, maxfree)
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag)
+#endif
+
+static kmem_zone_t *item_zone;
+
+/*
+ * Structure for associating a file or a directory with an allocation group.
+ * The parent directory pointer is only needed for files, but since there will
+ * generally be vastly more files than directories in the cache, using the same
+ * data structure simplifies the code with very little memory overhead.
+ */
+typedef struct fstrm_item
+{
+	xfs_agnumber_t	ag;	/* AG currently in use for the file/directory. */
+	xfs_inode_t	*ip;	/* inode self-pointer. */
+	xfs_inode_t	*pip;	/* Parent directory inode pointer. */
+} fstrm_item_t;
+
+
+/*
+ * Scan the AGs starting at startag looking for an AG that isn't in use and has
+ * at least minlen blocks free.
+ */
+static int
+_xfs_filestream_pick_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	startag,
+	xfs_agnumber_t	*agp,
+	int		flags,
+	xfs_extlen_t	minlen)
+{
+	int		err, trylock, nscan;
+	xfs_extlen_t	delta, longest, need, free, minfree, maxfree = 0;
+	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER;
+	struct xfs_perag *pag;
+
+	/* 2% of an AG's blocks must be free for it to be chosen. */
+	minfree = mp->m_sb.sb_agblocks / 50;
+
+	ag = startag;
+	*agp = NULLAGNUMBER;
+
+	/* For the first pass, don't sleep trying to init the per-AG. */
+	trylock = XFS_ALLOC_FLAG_TRYLOCK;
+
+	for (nscan = 0; 1; nscan++) {
+
+		TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
+
+		pag = mp->m_perag + ag;
+
+		if (!pag->pagf_init) {
+			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
+			if (err && !trylock)
+				return err;
+		}
+
+		/* Might fail sometimes during the 1st pass with trylock set. */
+		if (!pag->pagf_init)
+			goto next_ag;
+
+		/* Keep track of the AG with the most free blocks. */
+		if (pag->pagf_freeblks > maxfree) {
+			maxfree = pag->pagf_freeblks;
+			max_ag = ag;
+		}
+
+		/*
+		 * The AG reference count does two things: it enforces mutual
+		 * exclusion when examining the suitability of an AG in this
+		 * loop, and it guards against two filestreams being established
+		 * in the same AG as each other.
+		 */
+		if (xfs_filestream_get_ag(mp, ag) > 1) {
+			xfs_filestream_put_ag(mp, ag);
+			goto next_ag;
+		}
+
+		need = XFS_MIN_FREELIST_PAG(pag, mp);
+		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+		longest = (pag->pagf_longest > delta) ?
+		          (pag->pagf_longest - delta) :
+		          (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+
+		if (((minlen && longest >= minlen) ||
+		     (!minlen && pag->pagf_freeblks >= minfree)) &&
+		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
+		     (flags & XFS_PICK_LOWSPACE))) {
+
+			/* Break out, retaining the reference on the AG. */
+			free = pag->pagf_freeblks;
+			*agp = ag;
+			break;
+		}
+
+		/* Drop the reference on this AG, it's not usable. */
+		xfs_filestream_put_ag(mp, ag);
+next_ag:
+		/* Move to the next AG, wrapping to AG 0 if necessary. */
+		if (++ag >= mp->m_sb.sb_agcount)
+			ag = 0;
+
+		/* If a full pass of the AGs hasn't been done yet, continue. */
+		if (ag != startag)
+			continue;
+
+		/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
+		if (trylock != 0) {
+			trylock = 0;
+			continue;
+		}
+
+		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
+		if (!(flags & XFS_PICK_LOWSPACE)) {
+			flags |= XFS_PICK_LOWSPACE;
+			continue;
+		}
+
+		/*
+		 * Take the AG with the most free space, regardless of whether
+		 * it's already in use by another filestream.
+		 */
+		if (max_ag != NULLAGNUMBER) {
+			xfs_filestream_get_ag(mp, max_ag);
+			TRACE_AG_PICK1(mp, max_ag, maxfree);
+			free = maxfree;
+			*agp = max_ag;
+			break;
+		}
+
+		/* take AG 0 if none matched */
+		TRACE_AG_PICK1(mp, max_ag, maxfree);
+		*agp = 0;
+		return 0;
+	}
+
+	TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
+			free, nscan, flags);
+
+	return 0;
+}
+
+/*
+ * Set the allocation group number for a file or a directory, updating inode
+ * references and per-AG references as appropriate.  Must be called with the
+ * m_peraglock held in read mode.
+ */
+static int
+_xfs_filestream_update_ag(
+	xfs_inode_t	*ip,
+	xfs_inode_t	*pip,
+	xfs_agnumber_t	ag)
+{
+	int		err = 0;
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	old_ag;
+	xfs_inode_t	*old_pip;
+
+	/*
+	 * Either ip is a regular file and pip is a directory, or ip is a
+	 * directory and pip is NULL.
+	 */
+	ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
+	               (pip->i_d.di_mode & S_IFDIR)) ||
+	              ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+
+	mp = ip->i_mount;
+	cache = mp->m_filestream;
+
+	item = xfs_mru_cache_lookup(cache, ip->i_ino);
+	if (item) {
+		ASSERT(item->ip == ip);
+		old_ag = item->ag;
+		item->ag = ag;
+		old_pip = item->pip;
+		item->pip = pip;
+		xfs_mru_cache_done(cache);
+
+		/*
+		 * If the AG has changed, drop the old ref and take a new one,
+		 * effectively transferring the reference from old to new AG.
+		 */
+		if (ag != old_ag) {
+			xfs_filestream_put_ag(mp, old_ag);
+			xfs_filestream_get_ag(mp, ag);
+		}
+
+		/*
+		 * If ip is a file and its pip has changed, drop the old ref and
+		 * take a new one.
+		 */
+		if (pip && pip != old_pip) {
+			IRELE(old_pip);
+			IHOLD(pip);
+		}
+
+		TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
+				ag, xfs_filestream_peek_ag(mp, ag));
+		return 0;
+	}
+
+	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+	if (!item)
+		return ENOMEM;
+
+	item->ag = ag;
+	item->ip = ip;
+	item->pip = pip;
+
+	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+	if (err) {
+		kmem_zone_free(item_zone, item);
+		return err;
+	}
+
+	/* Take a reference on the AG. */
+	xfs_filestream_get_ag(mp, ag);
+
+	/*
+	 * Take a reference on the inode itself regardless of whether it's a
+	 * regular file or a directory.
+	 */
+	IHOLD(ip);
+
+	/*
+	 * In the case of a regular file, take a reference on the parent inode
+	 * as well to ensure it remains in-core.
+	 */
+	if (pip)
+		IHOLD(pip);
+
+	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
+			ag, xfs_filestream_peek_ag(mp, ag));
+
+	return 0;
+}
+
+/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
+void
+xfs_fstrm_free_func(
+	xfs_ino_t	ino,
+	fstrm_item_t	*item)
+{
+	xfs_inode_t	*ip = item->ip;
+	int ref;
+
+	ASSERT(ip->i_ino == ino);
+
+	xfs_iflags_clear(ip, XFS_IFILESTREAM);
+
+	/* Drop the reference taken on the AG when the item was added. */
+	ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+
+	ASSERT(ref >= 0);
+	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
+		xfs_filestream_peek_ag(ip->i_mount, item->ag));
+
+	/*
+	 * _xfs_filestream_update_ag() always takes a reference on the inode
+	 * itself, whether it's a file or a directory.  Release it here.
+	 * This can result in the inode being freed and so we must
+	 * not hold any inode locks when freeing filesstreams objects
+	 * otherwise we can deadlock here.
+	 */
+	IRELE(ip);
+
+	/*
+	 * In the case of a regular file, _xfs_filestream_update_ag() also
+	 * takes a ref on the parent inode to keep it in-core.  Release that
+	 * too.
+	 */
+	if (item->pip)
+		IRELE(item->pip);
+
+	/* Finally, free the memory allocated for the item. */
+	kmem_zone_free(item_zone, item);
+}
+
+/*
+ * xfs_filestream_init() is called at xfs initialisation time to set up the
+ * memory zone that will be used for filestream data structure allocation.
+ */
+int
+xfs_filestream_init(void)
+{
+	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+#ifdef XFS_FILESTREAMS_TRACE
+	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
+#endif
+	return item_zone ? 0 : -ENOMEM;
+}
+
+/*
+ * xfs_filestream_uninit() is called at xfs termination time to destroy the
+ * memory zone that was used for filestream data structure allocation.
+ */
+void
+xfs_filestream_uninit(void)
+{
+#ifdef XFS_FILESTREAMS_TRACE
+	ktrace_free(xfs_filestreams_trace_buf);
+#endif
+	kmem_zone_destroy(item_zone);
+}
+
+/*
+ * xfs_filestream_mount() is called when a file system is mounted with the
+ * filestream option.  It is responsible for allocating the data structures
+ * needed to track the new file system's file streams.
+ */
+int
+xfs_filestream_mount(
+	xfs_mount_t	*mp)
+{
+	int		err;
+	unsigned int	lifetime, grp_count;
+
+	/*
+	 * The filestream timer tunable is currently fixed within the range of
+	 * one second to four minutes, with five seconds being the default.  The
+	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
+	 * timer tunable to within about 10 percent.  This requires at least 10
+	 * groups.
+	 */
+	lifetime  = xfs_fstrm_centisecs * 10;
+	grp_count = 10;
+
+	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
+	                     (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
+
+	return err;
+}
+
+/*
+ * xfs_filestream_unmount() is called when a file system that was mounted with
+ * the filestream option is unmounted.  It drains the data structures created
+ * to track the file system's file streams and frees all the memory that was
+ * allocated.
+ */
+void
+xfs_filestream_unmount(
+	xfs_mount_t	*mp)
+{
+	xfs_mru_cache_destroy(mp->m_filestream);
+}
+
+/*
+ * If the mount point's m_perag array is going to be reallocated, all
+ * outstanding cache entries must be flushed to avoid accessing reference count
+ * addresses that have been freed.  The call to xfs_filestream_flush() must be
+ * made inside the block that holds the m_peraglock in write mode to do the
+ * reallocation.
+ */
+void
+xfs_filestream_flush(
+	xfs_mount_t	*mp)
+{
+	/* point in time flush, so keep the reaper running */
+	xfs_mru_cache_flush(mp->m_filestream, 1);
+}
+
+/*
+ * Return the AG of the filestream the file or directory belongs to, or
+ * NULLAGNUMBER otherwise.
+ */
+xfs_agnumber_t
+xfs_filestream_lookup_ag(
+	xfs_inode_t	*ip)
+{
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	ag;
+	int		ref;
+
+	if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+		ASSERT(0);
+		return NULLAGNUMBER;
+	}
+
+	cache = ip->i_mount->m_filestream;
+	item = xfs_mru_cache_lookup(cache, ip->i_ino);
+	if (!item) {
+		TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
+		return NULLAGNUMBER;
+	}
+
+	ASSERT(ip == item->ip);
+	ag = item->ag;
+	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
+	xfs_mru_cache_done(cache);
+
+	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
+	return ag;
+}
+
+/*
+ * xfs_filestream_associate() should only be called to associate a regular file
+ * with its parent directory.  Calling it with a child directory isn't
+ * appropriate because filestreams don't apply to entire directory hierarchies.
+ * Creating a file in a child directory of an existing filestream directory
+ * starts a new filestream with its own allocation group association.
+ *
+ * Returns < 0 on error, 0 if successful association occurred, > 0 if
+ * we failed to get an association because of locking issues.
+ */
+int
+xfs_filestream_associate(
+	xfs_inode_t	*pip,
+	xfs_inode_t	*ip)
+{
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	ag, rotorstep, startag;
+	int		err = 0;
+
+	ASSERT(pip->i_d.di_mode & S_IFDIR);
+	ASSERT(ip->i_d.di_mode & S_IFREG);
+	if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+		return -EINVAL;
+
+	mp = pip->i_mount;
+	cache = mp->m_filestream;
+	down_read(&mp->m_peraglock);
+
+	/*
+	 * We have a problem, Houston.
+	 *
+	 * Taking the iolock here violates inode locking order - we already
+	 * hold the ilock. Hence if we block getting this lock we may never
+	 * wake. Unfortunately, that means if we can't get the lock, we're
+	 * screwed in terms of getting a stream association - we can't spin
+	 * waiting for the lock because someone else is waiting on the lock we
+	 * hold and we cannot drop that as we are in a transaction here.
+	 *
+	 * Lucky for us, this inversion is rarely a problem because it's a
+	 * directory inode that we are trying to lock here and that means the
+	 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
+	 * used. i.e. freeze, remount-ro, quotasync or unmount.
+	 *
+	 * So, if we can't get the iolock without sleeping then just give up
+	 */
+	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
+		up_read(&mp->m_peraglock);
+		return 1;
+	}
+
+	/* If the parent directory is already in the cache, use its AG. */
+	item = xfs_mru_cache_lookup(cache, pip->i_ino);
+	if (item) {
+		ASSERT(item->ip == pip);
+		ag = item->ag;
+		xfs_mru_cache_done(cache);
+
+		TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+		err = _xfs_filestream_update_ag(ip, pip, ag);
+
+		goto exit;
+	}
+
+	/*
+	 * Set the starting AG using the rotor for inode32, otherwise
+	 * use the directory inode's AG.
+	 */
+	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+		rotorstep = xfs_rotorstep;
+		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
+		mp->m_agfrotor = (mp->m_agfrotor + 1) %
+		                 (mp->m_sb.sb_agcount * rotorstep);
+	} else
+		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
+
+	/* Pick a new AG for the parent inode starting at startag. */
+	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
+	if (err || ag == NULLAGNUMBER)
+		goto exit_did_pick;
+
+	/* Associate the parent inode with the AG. */
+	err = _xfs_filestream_update_ag(pip, NULL, ag);
+	if (err)
+		goto exit_did_pick;
+
+	/* Associate the file inode with the AG. */
+	err = _xfs_filestream_update_ag(ip, pip, ag);
+	if (err)
+		goto exit_did_pick;
+
+	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+
+exit_did_pick:
+	/*
+	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+	 * reference it took on it, since the file and directory will have taken
+	 * their own now if they were successfully cached.
+	 */
+	if (ag != NULLAGNUMBER)
+		xfs_filestream_put_ag(mp, ag);
+
+exit:
+	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+	up_read(&mp->m_peraglock);
+	return -err;
+}
+
+/*
+ * Pick a new allocation group for the current file and its file stream.  This
+ * function is called by xfs_bmap_filestreams() with the mount point's per-ag
+ * lock held.
+ */
+int
+xfs_filestream_new_ag(
+	xfs_bmalloca_t	*ap,
+	xfs_agnumber_t	*agp)
+{
+	int		flags, err;
+	xfs_inode_t	*ip, *pip = NULL;
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	xfs_extlen_t	minlen;
+	fstrm_item_t	*dir, *file;
+	xfs_agnumber_t	ag = NULLAGNUMBER;
+
+	ip = ap->ip;
+	mp = ip->i_mount;
+	cache = mp->m_filestream;
+	minlen = ap->alen;
+	*agp = NULLAGNUMBER;
+
+	/*
+	 * Look for the file in the cache, removing it if it's found.  Doing
+	 * this allows it to be held across the dir lookup that follows.
+	 */
+	file = xfs_mru_cache_remove(cache, ip->i_ino);
+	if (file) {
+		ASSERT(ip == file->ip);
+
+		/* Save the file's parent inode and old AG number for later. */
+		pip = file->pip;
+		ag = file->ag;
+
+		/* Look for the file's directory in the cache. */
+		dir = xfs_mru_cache_lookup(cache, pip->i_ino);
+		if (dir) {
+			ASSERT(pip == dir->ip);
+
+			/*
+			 * If the directory has already moved on to a new AG,
+			 * use that AG as the new AG for the file. Don't
+			 * forget to twiddle the AG refcounts to match the
+			 * movement.
+			 */
+			if (dir->ag != file->ag) {
+				xfs_filestream_put_ag(mp, file->ag);
+				xfs_filestream_get_ag(mp, dir->ag);
+				*agp = file->ag = dir->ag;
+			}
+
+			xfs_mru_cache_done(cache);
+		}
+
+		/*
+		 * Put the file back in the cache.  If this fails, the free
+		 * function needs to be called to tidy up in the same way as if
+		 * the item had simply expired from the cache.
+		 */
+		err = xfs_mru_cache_insert(cache, ip->i_ino, file);
+		if (err) {
+			xfs_fstrm_free_func(ip->i_ino, file);
+			return err;
+		}
+
+		/*
+		 * If the file's AG was moved to the directory's new AG, there's
+		 * nothing more to be done.
+		 */
+		if (*agp != NULLAGNUMBER) {
+			TRACE_MOVEAG(mp, ip, pip,
+					ag, xfs_filestream_peek_ag(mp, ag),
+					*agp, xfs_filestream_peek_ag(mp, *agp));
+			return 0;
+		}
+	}
+
+	/*
+	 * If the file's parent directory is known, take its iolock in exclusive
+	 * mode to prevent two sibling files from racing each other to migrate
+	 * themselves and their parent to different AGs.
+	 */
+	if (pip)
+		xfs_ilock(pip, XFS_IOLOCK_EXCL);
+
+	/*
+	 * A new AG needs to be found for the file.  If the file's parent
+	 * directory is also known, it will be moved to the new AG as well to
+	 * ensure that files created inside it in future use the new AG.
+	 */
+	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
+	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
+	        (ap->low ? XFS_PICK_LOWSPACE : 0);
+
+	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
+	if (err || *agp == NULLAGNUMBER)
+		goto exit;
+
+	/*
+	 * If the file wasn't found in the file cache, then its parent directory
+	 * inode isn't known.  For this to have happened, the file must either
+	 * be pre-existing, or it was created long enough ago that its cache
+	 * entry has expired.  This isn't the sort of usage that the filestreams
+	 * allocator is trying to optimise, so there's no point trying to track
+	 * its new AG somehow in the filestream data structures.
+	 */
+	if (!pip) {
+		TRACE_ORPHAN(mp, ip, *agp);
+		goto exit;
+	}
+
+	/* Associate the parent inode with the AG. */
+	err = _xfs_filestream_update_ag(pip, NULL, *agp);
+	if (err)
+		goto exit;
+
+	/* Associate the file inode with the AG. */
+	err = _xfs_filestream_update_ag(ip, pip, *agp);
+	if (err)
+		goto exit;
+
+	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
+			*agp, xfs_filestream_peek_ag(mp, *agp));
+
+exit:
+	/*
+	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+	 * reference it took on it, since the file and directory will have taken
+	 * their own now if they were successfully cached.
+	 */
+	if (*agp != NULLAGNUMBER)
+		xfs_filestream_put_ag(mp, *agp);
+	else
+		*agp = 0;
+
+	if (pip)
+		xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+
+	return err;
+}
+
+/*
+ * Remove an association between an inode and a filestream object.
+ * Typically this is done on last close of an unlinked file.
+ */
+void
+xfs_filestream_deassociate(
+	xfs_inode_t	*ip)
+{
+	xfs_mru_cache_t	*cache = ip->i_mount->m_filestream;
+
+	xfs_mru_cache_delete(cache, ip->i_ino);
+}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 00000000000..f655f7dc334
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_FILESTREAM_H__
+#define __XFS_FILESTREAM_H__
+
+#ifdef __KERNEL__
+
+struct xfs_mount;
+struct xfs_inode;
+struct xfs_perag;
+struct xfs_bmalloca;
+
+#ifdef XFS_FILESTREAMS_TRACE
+#define XFS_FSTRM_KTRACE_INFO		1
+#define XFS_FSTRM_KTRACE_AGSCAN		2
+#define XFS_FSTRM_KTRACE_AGPICK1	3
+#define XFS_FSTRM_KTRACE_AGPICK2	4
+#define XFS_FSTRM_KTRACE_UPDATE		5
+#define XFS_FSTRM_KTRACE_FREE		6
+#define	XFS_FSTRM_KTRACE_ITEM_LOOKUP	7
+#define	XFS_FSTRM_KTRACE_ASSOCIATE	8
+#define	XFS_FSTRM_KTRACE_MOVEAG		9
+#define	XFS_FSTRM_KTRACE_ORPHAN		10
+
+#define XFS_FSTRM_KTRACE_SIZE	16384
+extern ktrace_t *xfs_filestreams_trace_buf;
+
+#endif
+
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all.  The race condition this addresses is the following:
+ *
+ *  - The work queue scheduler fires and pulls a filestream directory cache
+ *    element off the LRU end of the cache for deletion, then gets pre-empted.
+ *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ *    remaining items from the cache and reallocates the mount point's per-ag
+ *    array, resetting all the counters to zero.
+ *  - The work queue thread resumes and calls the free function for the element
+ *    it started cleaning up earlier.  In the process it decrements the
+ *    filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+STATIC_INLINE int
+xfs_filestream_peek_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_read(&mp->m_perag[agno].pagf_fstrms);
+}
+
+STATIC_INLINE int
+xfs_filestream_get_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
+}
+
+STATIC_INLINE int
+xfs_filestream_put_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
+}
+
+/* allocation selection flags */
+typedef enum xfs_fstrm_alloc {
+	XFS_PICK_USERDATA = 1,
+	XFS_PICK_LOWSPACE = 2,
+} xfs_fstrm_alloc_t;
+
+/* prototypes for filestream.c */
+int xfs_filestream_init(void);
+void xfs_filestream_uninit(void);
+int xfs_filestream_mount(struct xfs_mount *mp);
+void xfs_filestream_unmount(struct xfs_mount *mp);
+void xfs_filestream_flush(struct xfs_mount *mp);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
+int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
+void xfs_filestream_deassociate(struct xfs_inode *ip);
+int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+
+
+/* filestreams for the inode? */
+STATIC_INLINE int
+xfs_inode_is_filestream(
+	struct xfs_inode	*ip)
+{
+	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
+		xfs_iflags_test(ip, XFS_IFILESTREAM) ||
+		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841c..ec3c9c27e0d 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -66,6 +66,7 @@ struct fsxattr {
 #define XFS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
 #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
 #define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
+#define XFS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
 #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /*
@@ -238,6 +239,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_LOGV2	0x0100	/* log format version 2	*/
 #define XFS_FSOP_GEOM_FLAGS_SECTOR	0x0200	/* sector sizes >1BB	*/
 #define XFS_FSOP_GEOM_FLAGS_ATTR2	0x0400	/* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_LAZYSB	0x4000	/* lazy superblock counters */
 
 
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b599e6be9ec..432e82347ed 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_rtalloc.h"
 #include "xfs_rw.h"
+#include "xfs_filestream.h"
 
 /*
  * File system operations
@@ -94,6 +95,8 @@ xfs_fs_geometry(
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
 			(XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
+				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
 			(XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
 		geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
@@ -140,6 +143,8 @@ xfs_growfs_data_private(
 	pct = in->imaxpct;
 	if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
 		return XFS_ERROR(EINVAL);
+	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
+		return error;
 	dpct = pct - mp->m_sb.sb_imax_pct;
 	error = xfs_read_buf(mp, mp->m_ddev_targp,
 			XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
@@ -161,6 +166,7 @@ xfs_growfs_data_private(
 	new = nb - mp->m_sb.sb_dblocks;
 	oagcount = mp->m_sb.sb_agcount;
 	if (nagcount > oagcount) {
+		xfs_filestream_flush(mp);
 		down_write(&mp->m_peraglock);
 		mp->m_perag = kmem_realloc(mp->m_perag,
 			sizeof(xfs_perag_t) * nagcount,
@@ -173,6 +179,7 @@ xfs_growfs_data_private(
 		up_write(&mp->m_peraglock);
 	}
 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
+	tp->t_flags |= XFS_TRANS_RESERVE;
 	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
 			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
 		xfs_trans_cancel(tp, 0);
@@ -328,6 +335,7 @@ xfs_growfs_data_private(
 		be32_add(&agf->agf_length, new);
 		ASSERT(be32_to_cpu(agf->agf_length) ==
 		       be32_to_cpu(agi->agi_length));
+		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
 		/*
 		 * Free the new space.
 		 */
@@ -494,8 +502,9 @@ xfs_reserve_blocks(
 	unsigned long		s;
 
 	/* If inval is null, report current values and return */
-
 	if (inval == (__uint64_t *)NULL) {
+		if (!outval)
+			return EINVAL;
 		outval->resblks = mp->m_resblks;
 		outval->resblks_avail = mp->m_resblks_avail;
 		return 0;
@@ -558,8 +567,10 @@ retry:
 		}
 	}
 out:
-	outval->resblks = mp->m_resblks;
-	outval->resblks_avail = mp->m_resblks_avail;
+	if (outval) {
+		outval->resblks = mp->m_resblks;
+		outval->resblks_avail = mp->m_resblks_avail;
+	}
 	XFS_SB_UNLOCK(mp, s);
 
 	if (fdblks_delta) {
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e7711..f943368c9b9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
 	int		blks_per_cluster;  /* fs blocks per inode cluster */
 	xfs_btree_cur_t	*cur;		/* inode btree cursor */
 	xfs_daddr_t	d;		/* disk addr of buffer */
+	xfs_agnumber_t	agno;
 	int		error;
 	xfs_buf_t	*fbuf;		/* new free inodes' buffer */
 	xfs_dinode_t	*free;		/* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
 	}
 	be32_add(&agi->agi_count, newlen);
 	be32_add(&agi->agi_freecount, newlen);
+	agno = be32_to_cpu(agi->agi_seqno);
 	down_read(&args.mp->m_peraglock);
-	args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
+	args.mp->m_perag[agno].pagi_freecount += newlen;
 	up_read(&args.mp->m_peraglock);
 	agi->agi_newino = cpu_to_be32(newino);
 	/*
 	 * Insert records describing the new inode chunk into the btree.
 	 */
-	cur = xfs_btree_init_cursor(args.mp, tp, agbp,
-			be32_to_cpu(agi->agi_seqno),
+	cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
 			XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
 	for (thisino = newino;
 	     thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
 	pag = &mp->m_perag[agno];
 	if (!pag->pagi_init) {
 		pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+		pag->pagi_count = be32_to_cpu(agi->agi_count);
 		pag->pagi_init = 1;
 	} else {
 		/*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
 	*bpp = bp;
 	return 0;
 }
+
+/*
+ * Read in the agi to initialise the per-ag data in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+	xfs_mount_t	*mp,		/* file system mount structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno)		/* allocation group number */
+{
+	xfs_buf_t	*bp = NULL;
+	int		error;
+
+	error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
+	if (error)
+		return error;
+	if (bp)
+		xfs_trans_brelse(tp, bp);
+	return 0;
+}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb..97f4040931c 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
 	xfs_agnumber_t	agno,		/* allocation group number */
 	struct xfs_buf	**bpp);		/* allocation group hdr buf */
 
+/*
+ * Read in the allocation group header to initialise the per-ag data
+ * in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+	struct xfs_mount *mp,		/* file system mount structure */
+	struct xfs_trans *tp,		/* transaction pointer */
+        xfs_agnumber_t  agno);		/* allocation group number */
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3ca5d43b834..cdc4c28926d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -48,7 +48,9 @@
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
 #include "xfs_acl.h"
+#include "xfs_filestream.h"
 
+#include <linux/log2.h>
 
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
@@ -643,8 +645,7 @@ xfs_iformat_extents(
 			ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
 								ARCH_CONVERT);
 		}
-		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
-			whichfork);
+		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
 		if (whichfork != XFS_DATA_FORK ||
 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
 				if (unlikely(xfs_check_nostate_extents(
@@ -817,6 +818,8 @@ _xfs_dic2xflags(
 			flags |= XFS_XFLAG_EXTSZINHERIT;
 		if (di_flags & XFS_DIFLAG_NODEFRAG)
 			flags |= XFS_XFLAG_NODEFRAG;
+		if (di_flags & XFS_DIFLAG_FILESTREAM)
+			flags |= XFS_XFLAG_FILESTREAM;
 	}
 
 	return flags;
@@ -1074,6 +1077,11 @@ xfs_iread_extents(
  * also returns the [locked] bp pointing to the head of the freelist
  * as ialloc_context.  The caller should hold this buffer across
  * the commit and pass it back into this routine on the second call.
+ *
+ * If we are allocating quota inodes, we do not have a parent inode
+ * to attach to or associate with (i.e. pip == NULL) because they
+ * are not linked into the directory structure - they are attached
+ * directly to the superblock - and so have no parent.
  */
 int
 xfs_ialloc(
@@ -1099,7 +1107,7 @@ xfs_ialloc(
 	 * Call the space management code to pick
 	 * the on-disk inode to be allocated.
 	 */
-	error = xfs_dialloc(tp, pip->i_ino, mode, okalloc,
+	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
 			    ialloc_context, call_again, &ino);
 	if (error != 0) {
 		return error;
@@ -1150,10 +1158,10 @@ xfs_ialloc(
 	/*
 	 * Project ids won't be stored on disk if we are using a version 1 inode.
 	 */
-	if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
+	if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
 		xfs_bump_ino_vers2(tp, ip);
 
-	if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
+	if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
 		ip->i_d.di_gid = pip->i_d.di_gid;
 		if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
 			ip->i_d.di_mode |= S_ISGID;
@@ -1195,8 +1203,16 @@ xfs_ialloc(
 		flags |= XFS_ILOG_DEV;
 		break;
 	case S_IFREG:
+		if (pip && xfs_inode_is_filestream(pip)) {
+			error = xfs_filestream_associate(pip, ip);
+			if (error < 0)
+				return -error;
+			if (!error)
+				xfs_iflags_set(ip, XFS_IFILESTREAM);
+		}
+		/* fall through */
 	case S_IFDIR:
-		if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
+		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 			uint	di_flags = 0;
 
 			if ((mode & S_IFMT) == S_IFDIR) {
@@ -1233,6 +1249,8 @@ xfs_ialloc(
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
 			    xfs_inherit_nodefrag)
 				di_flags |= XFS_DIFLAG_NODEFRAG;
+			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+				di_flags |= XFS_DIFLAG_FILESTREAM;
 			ip->i_d.di_flags |= di_flags;
 		}
 		/* FALLTHROUGH */
@@ -2875,9 +2893,6 @@ xfs_iextents_copy(
 	int			copied;
 	xfs_bmbt_rec_t		*dest_ep;
 	xfs_bmbt_rec_t		*ep;
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_iextents_copy";
-#endif
 	int			i;
 	xfs_ifork_t		*ifp;
 	int			nrecs;
@@ -2888,7 +2903,7 @@ xfs_iextents_copy(
 	ASSERT(ifp->if_bytes > 0);
 
 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
+	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
 	ASSERT(nrecs > 0);
 
 	/*
@@ -4184,7 +4199,7 @@ xfs_iext_realloc_direct(
 			ifp->if_bytes = new_size;
 			return;
 		}
-		if ((new_size & (new_size - 1)) != 0) {
+		if (!is_power_of_2(new_size)){
 			rnew_size = xfs_iroundup(new_size);
 		}
 		if (rnew_size != ifp->if_real_bytes) {
@@ -4207,7 +4222,7 @@ xfs_iext_realloc_direct(
 	 */
 	else {
 		new_size += ifp->if_bytes;
-		if ((new_size & (new_size - 1)) != 0) {
+		if (!is_power_of_2(new_size)) {
 			rnew_size = xfs_iroundup(new_size);
 		}
 		xfs_iext_inline_to_direct(ifp, rnew_size);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f75afecef8e..012dfd4a958 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
 #define XFS_ISTALE	0x0010	/* inode has been staled */
 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
 #define XFS_INEW	0x0040
+#define XFS_IFILESTREAM	0x0080	/* inode is in a filestream directory */
 
 /*
  * Flags for inode locking.
@@ -414,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
  * gets a lockdep subclass of 1 and the second lock will have a lockdep
  * subclass of 0.
  *
- * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time
+ * XFS_LOCK_INUMORDER - for locking several inodes at the some time
  * with xfs_lock_inodes().  This flag is used as the starting subclass
  * and each subsequent lock acquired will increment the subclass by one.
  * So the first lock acquired will have a lockdep subclass of 2, the
- * second lock will have a lockdep subclass of 3, and so on.
+ * second lock will have a lockdep subclass of 3, and so on. It is
+ * the responsibility of the class builder to shift this to the correct
+ * portion of the lock_mode lockdep mask.
  */
+#define XFS_LOCK_PARENT		1
+#define XFS_LOCK_INUMORDER	2
+
 #define XFS_IOLOCK_SHIFT	16
-#define	XFS_IOLOCK_PARENT	(1 << XFS_IOLOCK_SHIFT)
-#define	XFS_IOLOCK_INUMORDER	(2 << XFS_IOLOCK_SHIFT)
+#define	XFS_IOLOCK_PARENT	(XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
 
 #define XFS_ILOCK_SHIFT		24
-#define	XFS_ILOCK_PARENT	(1 << XFS_ILOCK_SHIFT)
-#define	XFS_ILOCK_INUMORDER	(2 << XFS_ILOCK_SHIFT)
+#define	XFS_ILOCK_PARENT	(XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
 
 #define XFS_IOLOCK_DEP_MASK	0x00ff0000
 #define XFS_ILOCK_DEP_MASK	0xff000000
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b9..bf57b75acb9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -451,19 +451,14 @@ xfs_iomap_write_direct(
 		return XFS_ERROR(error);
 
 	rt = XFS_IS_REALTIME_INODE(ip);
-	if (unlikely(rt)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
+	extsz = xfs_get_extsz_hint(ip);
 
 	isize = ip->i_size;
 	if (io->io_new_size > isize)
 		isize = io->io_new_size;
 
-  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-  	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	if ((offset + count) > isize) {
 		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
 							&last_fsb);
@@ -489,13 +484,13 @@ xfs_iomap_write_direct(
 	if (unlikely(rt)) {
 		resrtextents = qblocks = resaligned;
 		resrtextents /= mp->m_sb.sb_rextsize;
-  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-  		quota_flag = XFS_QMOPT_RES_RTBLKS;
-  	} else {
-  		resrtextents = 0;
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+		quota_flag = XFS_QMOPT_RES_RTBLKS;
+	} else {
+		resrtextents = 0;
 		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-  		quota_flag = XFS_QMOPT_RES_REGBLKS;
-  	}
+		quota_flag = XFS_QMOPT_RES_REGBLKS;
+	}
 
 	/*
 	 * Allocate and setup the transaction
@@ -666,13 +661,7 @@ xfs_iomap_write_delay(
 	if (error)
 		return XFS_ERROR(error);
 
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
-
+	extsz = xfs_get_extsz_hint(ip);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
 retry:
@@ -788,18 +777,12 @@ xfs_iomap_write_allocate(
 		nimaps = 0;
 		while (nimaps == 0) {
 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+			tp->t_flags |= XFS_TRANS_RESERVE;
 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
 			error = xfs_trans_reserve(tp, nres,
 					XFS_WRITE_LOG_RES(mp),
 					0, XFS_TRANS_PERM_LOG_RES,
 					XFS_WRITE_LOG_COUNT);
-			if (error == ENOSPC) {
-				error = xfs_trans_reserve(tp, 0,
-						XFS_WRITE_LOG_RES(mp),
-						0,
-						XFS_TRANS_PERM_LOG_RES,
-						XFS_WRITE_LOG_COUNT);
-			}
 			if (error) {
 				xfs_trans_cancel(tp, 0);
 				return XFS_ERROR(error);
@@ -917,8 +900,8 @@ xfs_iomap_write_unwritten(
 		 * from unwritten to real. Do allocations in a loop until
 		 * we have covered the range passed in.
 		 */
-
 		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		tp->t_flags |= XFS_TRANS_RESERVE;
 		error = xfs_trans_reserve(tp, resblks,
 				XFS_WRITE_LOG_RES(mp), 0,
 				XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e725ddd3de5..4c2454bcc71 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode(
 	return 0;
 }
 
+STATIC int
+xfs_bulkstat_one_fmt(
+	void			__user *ubuffer,
+	const xfs_bstat_t	*buffer)
+{
+	if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
+		return -EFAULT;
+	return sizeof(*buffer);
+}
+
 /*
  * Return stat information for one inode.
  * Return 0 if ok, else errno.
@@ -221,6 +231,7 @@ xfs_bulkstat_one(
 	xfs_bstat_t	*buf;		/* return buffer */
 	int		error = 0;	/* error value */
 	xfs_dinode_t	*dip;		/* dinode inode pointer */
+	bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
 
 	dip = (xfs_dinode_t *)dibuff;
 	*stat = BULKSTAT_RV_NOTHING;
@@ -243,14 +254,15 @@ xfs_bulkstat_one(
 		xfs_bulkstat_one_dinode(mp, ino, dip, buf);
 	}
 
-	if (copy_to_user(buffer, buf, sizeof(*buf)))  {
+	error = formatter(buffer, buf);
+	if (error < 0)  {
 		error = EFAULT;
 		goto out_free;
 	}
 
 	*stat = BULKSTAT_RV_DIDONE;
 	if (ubused)
-		*ubused = sizeof(*buf);
+		*ubused = error;
 
  out_free:
 	kmem_free(buf, sizeof(*buf));
@@ -748,6 +760,19 @@ xfs_bulkstat_single(
 	return 0;
 }
 
+int
+xfs_inumbers_fmt(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written)	/* # of bytes written */
+{
+	if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
+		return -EFAULT;
+	*written = count * sizeof(*buffer);
+	return 0;
+}
+
 /*
  * Return inode number table for the filesystem.
  */
@@ -756,7 +781,8 @@ xfs_inumbers(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_ino_t	*lastino,	/* last inode returned */
 	int		*count,		/* size of buffer/count returned */
-	xfs_inogrp_t	__user *ubuffer)/* buffer with inode descriptions */
+	void		__user *ubuffer,/* buffer with inode descriptions */
+	inumbers_fmt_pf	formatter)
 {
 	xfs_buf_t	*agbp;
 	xfs_agino_t	agino;
@@ -835,12 +861,12 @@ xfs_inumbers(
 		bufidx++;
 		left--;
 		if (bufidx == bcount) {
-			if (copy_to_user(ubuffer, buffer,
-					bufidx * sizeof(*buffer))) {
+			long written;
+			if (formatter(ubuffer, buffer, bufidx, &written)) {
 				error = XFS_ERROR(EFAULT);
 				break;
 			}
-			ubuffer += bufidx;
+			ubuffer += written;
 			*count += bufidx;
 			bufidx = 0;
 		}
@@ -862,8 +888,8 @@ xfs_inumbers(
 	}
 	if (!error) {
 		if (bufidx) {
-			if (copy_to_user(ubuffer, buffer,
-					bufidx * sizeof(*buffer)))
+			long written;
+			if (formatter(ubuffer, buffer, bufidx, &written))
 				error = XFS_ERROR(EFAULT);
 			else
 				*count += bufidx;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f25a28862a1..a1f18fce9b7 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -69,6 +69,10 @@ xfs_bulkstat_single(
 	char			__user *buffer,
 	int			*done);
 
+typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_bstat_t	*buffer);        /* buffer to read from */
+
 int
 xfs_bulkstat_one(
 	xfs_mount_t		*mp,
@@ -86,11 +90,25 @@ xfs_internal_inum(
 	xfs_mount_t		*mp,
 	xfs_ino_t		ino);
 
+typedef int (*inumbers_fmt_pf)(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written);	/* # of bytes written */
+
+int
+xfs_inumbers_fmt(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written);	/* # of bytes written */
+
 int					/* error status */
 xfs_inumbers(
 	xfs_mount_t		*mp,	/* mount point for filesystem */
 	xfs_ino_t		*last,	/* last inode returned */
 	int			*count,	/* size of buffer/count returned */
-	xfs_inogrp_t		__user *buffer);/* buffer with inode info */
+	void			__user *buffer, /* buffer with inode info */
+	inumbers_fmt_pf		formatter);
 
 #endif	/* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17b..9d4c4fbeb3e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
 	SPLDECL(s);
 	int		needed = 0, gen;
 	xlog_t		*log = mp->m_log;
-	bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
 
-	if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
-	    (vfsp->vfs_flag & VFS_RDONLY))
+	if (!xfs_fs_writable(mp))
 		return 0;
 
 	s = LOG_LOCK(log);
@@ -967,14 +965,16 @@ xlog_iodone(xfs_buf_t *bp)
 	} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		aborted = XFS_LI_ABORTED;
 	}
+
+	/* log I/O is always issued ASYNC */
+	ASSERT(XFS_BUF_ISASYNC(bp));
 	xlog_state_done_syncing(iclog, aborted);
-	if (!(XFS_BUF_ISASYNC(bp))) {
-		/*
-		 * Corresponding psema() will be done in bwrite().  If we don't
-		 * vsema() here, panic.
-		 */
-		XFS_BUF_V_IODONESEMA(bp);
-	}
+	/*
+	 * do not reference the buffer (bp) here as we could race
+	 * with it being freed after writing the unmount record to the
+	 * log.
+	 */
+
 }	/* xlog_iodone */
 
 /*
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		*iclogp = (xlog_in_core_t *)
 			  kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
 		iclog = *iclogp;
-		iclog->hic_data = (xlog_in_core_2_t *)
-			  kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
-
 		iclog->ic_prev = prev_iclog;
 		prev_iclog = iclog;
+
+		bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+		if (!XFS_BUF_CPSEMA(bp))
+			ASSERT(0);
+		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+		XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+		iclog->ic_bp = bp;
+		iclog->hic_data = bp->b_addr;
+
 		log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
 
 		head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
 		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
 
-		bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
-		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
-		XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
-		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
-		iclog->ic_bp = bp;
 
 		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
 		iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1432,7 +1434,7 @@ xlog_sync(xlog_t		*log,
 	} else {
 		iclog->ic_bwritecnt = 1;
 	}
-	XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count);
+	XFS_BUF_SET_COUNT(bp, count);
 	XFS_BUF_SET_FSPRIVATE(bp, iclog);	/* save for later */
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
 		}
 #endif
 		next_iclog = iclog->ic_next;
-		kmem_free(iclog->hic_data, log->l_iclog_size);
 		kmem_free(iclog, sizeof(xlog_in_core_t));
 		iclog = next_iclog;
 	}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c9..fddbb091a86 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
 			ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
 					after_umount_blk);
 			*tail_blk = after_umount_blk;
+
+			/*
+			 * Note that the unmount was clean. If the unmount
+			 * was not clean, we need to know this to rebuild the
+			 * superblock counters from the perag headers if we
+			 * have a filesystem using non-persistent counters.
+			 */
+			log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
 		}
 	}
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a96bde6df96..a66b3980517 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -202,6 +202,27 @@ xfs_mount_free(
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
+/*
+ * Check size of device based on the (data/realtime) block count.
+ * Note: this check is used by the growfs code as well as mount.
+ */
+int
+xfs_sb_validate_fsb_count(
+	xfs_sb_t	*sbp,
+	__uint64_t	nblocks)
+{
+	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
+	ASSERT(sbp->sb_blocklog >= BBSHIFT);
+
+#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
+	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+		return E2BIG;
+#else                  /* Limited by UINT_MAX of sectors */
+	if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
+		return E2BIG;
+#endif
+	return 0;
+}
 
 /*
  * Check the validity of the SB found.
@@ -284,18 +305,8 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
-	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
-	ASSERT(sbp->sb_blocklog >= BBSHIFT);
-
-#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
-	if (unlikely(
-	    (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
-	    (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
-#else                  /* Limited by UINT_MAX of sectors */
-	if (unlikely(
-	    (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
-	    (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
-#endif
+	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
+	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
 		xfs_fs_mount_cmn_err(flags,
 			"file system too large to be mounted on this system.");
 		return XFS_ERROR(E2BIG);
@@ -632,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 					sbp->sb_inopblock);
 	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
 }
+
+/*
+ * xfs_initialize_perag_data
+ *
+ * Read in each per-ag structure so we can count up the number of
+ * allocated inodes, free inodes and used filesystem blocks as this
+ * information is no longer persistent in the superblock. Once we have
+ * this information, write it into the in-core superblock structure.
+ */
+STATIC int
+xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
+{
+	xfs_agnumber_t	index;
+	xfs_perag_t	*pag;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	uint64_t	ifree = 0;
+	uint64_t	ialloc = 0;
+	uint64_t	bfree = 0;
+	uint64_t	bfreelst = 0;
+	uint64_t	btree = 0;
+	int		error;
+	int		s;
+
+	for (index = 0; index < agcount; index++) {
+		/*
+		 * read the agf, then the agi. This gets us
+		 * all the inforamtion we need and populates the
+		 * per-ag structures for us.
+		 */
+		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
+		if (error)
+			return error;
+
+		error = xfs_ialloc_pagi_init(mp, NULL, index);
+		if (error)
+			return error;
+		pag = &mp->m_perag[index];
+		ifree += pag->pagi_freecount;
+		ialloc += pag->pagi_count;
+		bfree += pag->pagf_freeblks;
+		bfreelst += pag->pagf_flcount;
+		btree += pag->pagf_btreeblks;
+	}
+	/*
+	 * Overwrite incore superblock counters with just-read data
+	 */
+	s = XFS_SB_LOCK(mp);
+	sbp->sb_ifree = ifree;
+	sbp->sb_icount = ialloc;
+	sbp->sb_fdblocks = bfree + bfreelst + btree;
+	XFS_SB_UNLOCK(mp, s);
+
+	/* Fixup the per-cpu counters as well. */
+	xfs_icsb_reinit_counters(mp);
+
+	return 0;
+}
+
 /*
  * xfs_mountfs
  *
@@ -656,7 +725,7 @@ xfs_mountfs(
 	bhv_vnode_t	*rvp = NULL;
 	int		readio_log, writeio_log;
 	xfs_daddr_t	d;
-	__uint64_t	ret64;
+	__uint64_t	resblks;
 	__int64_t	update_flags;
 	uint		quotamount, quotaflags;
 	int		agno;
@@ -773,6 +842,7 @@ xfs_mountfs(
 	 */
 	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
 	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+		__uint64_t	ret64;
 		if (xfs_uuid_mount(mp)) {
 			error = XFS_ERROR(EINVAL);
 			goto error1;
@@ -976,6 +1046,34 @@ xfs_mountfs(
 	}
 
 	/*
+	 * Now the log is mounted, we know if it was an unclean shutdown or
+	 * not. If it was, with the first phase of recovery has completed, we
+	 * have consistent AG blocks on disk. We have not recovered EFIs yet,
+	 * but they are recovered transactionally in the second recovery phase
+	 * later.
+	 *
+	 * Hence we can safely re-initialise incore superblock counters from
+	 * the per-ag data. These may not be correct if the filesystem was not
+	 * cleanly unmounted, so we need to wait for recovery to finish before
+	 * doing this.
+	 *
+	 * If the filesystem was cleanly unmounted, then we can trust the
+	 * values in the superblock to be correct and we don't need to do
+	 * anything here.
+	 *
+	 * If we are currently making the filesystem, the initialisation will
+	 * fail as the perag data is in an undefined state.
+	 */
+
+	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+	    !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
+	     !mp->m_sb.sb_inprogress) {
+		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
+		if (error) {
+			goto error2;
+		}
+	}
+	/*
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
 	 */
@@ -1044,6 +1142,23 @@ xfs_mountfs(
 	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
 		goto error4;
 
+	/*
+	 * Now we are mounted, reserve a small amount of unused space for
+	 * privileged transactions. This is needed so that transaction
+	 * space required for critical operations can dip into this pool
+	 * when at ENOSPC. This is needed for operations like create with
+	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
+	 * are not allowed to use this reserved space.
+	 *
+	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
+	 * This may drive us straight to ENOSPC on mount, but that implies
+	 * we were already there on the last unmount.
+	 */
+	resblks = mp->m_sb.sb_dblocks;
+	do_div(resblks, 20);
+	resblks = min_t(__uint64_t, resblks, 1024);
+	xfs_reserve_blocks(mp, &resblks, NULL);
+
 	return 0;
 
  error4:
@@ -1083,7 +1198,19 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	int64_t		fsid;
 #endif
+	__uint64_t	resblks;
 
+	/*
+	 * We can potentially deadlock here if we have an inode cluster
+	 * that has been freed has it's buffer still pinned in memory because
+	 * the transaction is still sitting in a iclog. The stale inodes
+	 * on that buffer will have their flush locks held until the
+	 * transaction hits the disk and the callbacks run. the inode
+	 * flush takes the flush lock unconditionally and with nothing to
+	 * push out the iclog we will never get that unlocked. hence we
+	 * need to force the log first.
+	 */
+	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
 	xfs_iflush_all(mp);
 
 	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
@@ -1100,10 +1227,26 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 		xfs_binval(mp->m_rtdev_targp);
 	}
 
-	xfs_unmountfs_writesb(mp);
+	/*
+	 * Unreserve any blocks we have so that when we unmount we don't account
+	 * the reserved free space as used. This is really only necessary for
+	 * lazy superblock counting because it trusts the incore superblock
+	 * counters to be aboslutely correct on clean unmount.
+	 *
+	 * We don't bother correcting this elsewhere for lazy superblock
+	 * counting because on mount of an unclean filesystem we reconstruct the
+	 * correct counter value and this is irrelevant.
+	 *
+	 * For non-lazy counter filesystems, this doesn't matter at all because
+	 * we only every apply deltas to the superblock and hence the incore
+	 * value does not matter....
+	 */
+	resblks = 0;
+	xfs_reserve_blocks(mp, &resblks, NULL);
 
+	xfs_log_sbcount(mp, 1);
+	xfs_unmountfs_writesb(mp);
 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
-
 	xfs_log_unmount(mp);			/* Done! No more fs ops. */
 
 	xfs_freesb(mp);
@@ -1150,6 +1293,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
 }
 
 int
+xfs_fs_writable(xfs_mount_t *mp)
+{
+	bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
+
+	return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
+		(vfsp->vfs_flag & VFS_RDONLY));
+}
+
+/*
+ * xfs_log_sbcount
+ *
+ * Called either periodically to keep the on disk superblock values
+ * roughly up to date or from unmount to make sure the values are
+ * correct on a clean unmount.
+ *
+ * Note this code can be called during the process of freezing, so
+ * we may need to use the transaction allocator which does not not
+ * block when the transaction subsystem is in its frozen state.
+ */
+int
+xfs_log_sbcount(
+	xfs_mount_t	*mp,
+	uint		sync)
+{
+	xfs_trans_t	*tp;
+	int		error;
+
+	if (!xfs_fs_writable(mp))
+		return 0;
+
+	xfs_icsb_sync_counters(mp);
+
+	/*
+	 * we don't need to do this if we are updating the superblock
+	 * counters on every modification.
+	 */
+	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
+		return 0;
+
+	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
+	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+					XFS_DEFAULT_LOG_COUNT);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+
+	xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
+	if (sync)
+		xfs_trans_set_sync(tp);
+	xfs_trans_commit(tp, 0);
+
+	return 0;
+}
+
+int
 xfs_unmountfs_writesb(xfs_mount_t *mp)
 {
 	xfs_buf_t	*sbp;
@@ -1160,16 +1359,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 	 * skip superblock write if fs is read-only, or
 	 * if we are doing a forced umount.
 	 */
-	sbp = xfs_getsb(mp, 0);
 	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
 		XFS_FORCED_SHUTDOWN(mp))) {
 
-		xfs_icsb_sync_counters(mp);
+		sbp = xfs_getsb(mp, 0);
+ 		sb = XFS_BUF_TO_SBP(sbp);
 
 		/*
 		 * mark shared-readonly if desired
 		 */
-		sb = XFS_BUF_TO_SBP(sbp);
 		if (mp->m_mk_sharedro) {
 			if (!(sb->sb_flags & XFS_SBF_READONLY))
 				sb->sb_flags |= XFS_SBF_READONLY;
@@ -1178,6 +1376,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 			xfs_fs_cmn_err(CE_NOTE, mp,
 				"Unmounting, marking shared read-only");
 		}
+
 		XFS_BUF_UNDONE(sbp);
 		XFS_BUF_UNREAD(sbp);
 		XFS_BUF_UNDELAYWRITE(sbp);
@@ -1192,8 +1391,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 					  mp, sbp, XFS_BUF_ADDR(sbp));
 		if (error && mp->m_mk_sharedro)
 			xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
+		xfs_buf_relse(sbp);
 	}
-	xfs_buf_relse(sbp);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 82304b94646..76ad7475869 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
+struct xfs_mru_cache;
 
 extern struct bhv_vfsops xfs_vfsops;
 extern struct bhv_vnodeops xfs_vnodeops;
@@ -424,17 +425,18 @@ typedef struct xfs_mount {
 	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 	struct mutex		m_icsb_mutex;	/* balancer sync lock */
 #endif
+	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 } xfs_mount_t;
 
 /*
  * Flags for m_flags.
  */
-#define	XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
+#define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 						   must be synchronous except
 						   for space allocations */
-#define	XFS_MOUNT_INO64		(1ULL << 1)
+#define XFS_MOUNT_INO64		(1ULL << 1)
 			     /* (1ULL << 2)	-- currently unused */
-			     /* (1ULL << 3)	-- currently unused */
+#define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
 						   disk errors in metadata */
@@ -463,6 +465,8 @@ typedef struct xfs_mount {
 						 * I/O size in stat() */
 #define XFS_MOUNT_NO_PERCPU_SB	(1ULL << 23)	/* don't use per-cpu superblock
 						   counters */
+#define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
+						   allocator */
 
 
 /*
@@ -511,6 +515,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 
 #define XFS_MAXIOFFSET(mp)	((mp)->m_maxioffset)
 
+#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
+				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 #define xfs_force_shutdown(m,f)	\
 	bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +608,7 @@ typedef struct xfs_mod_sb {
 
 extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
+extern int	xfs_log_sbcount(xfs_mount_t *, uint);
 extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int	xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,12 +625,14 @@ extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int	xfs_readsb(xfs_mount_t *, int);
 extern void	xfs_freesb(xfs_mount_t *);
+extern int	xfs_fs_writable(xfs_mount_t *);
 extern void	xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
 extern int	xfs_syncsub(xfs_mount_t *, int, int *);
 extern int	xfs_sync_inodes(xfs_mount_t *, int, int *);
 extern xfs_agnumber_t	xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
 						xfs_agnumber_t);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
+extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 
 extern struct xfs_dmops xfs_dmcore_stub;
 extern struct xfs_qmops xfs_qmcore_stub;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
new file mode 100644
index 00000000000..7deb9e3cbbd
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_mru_cache.h"
+
+/*
+ * The MRU Cache data structure consists of a data store, an array of lists and
+ * a lock to protect its internal state.  At initialisation time, the client
+ * supplies an element lifetime in milliseconds and a group count, as well as a
+ * function pointer to call when deleting elements.  A data structure for
+ * queueing up work in the form of timed callbacks is also included.
+ *
+ * The group count controls how many lists are created, and thereby how finely
+ * the elements are grouped in time.  When reaping occurs, all the elements in
+ * all the lists whose time has expired are deleted.
+ *
+ * To give an example of how this works in practice, consider a client that
+ * initialises an MRU Cache with a lifetime of ten seconds and a group count of
+ * five.  Five internal lists will be created, each representing a two second
+ * period in time.  When the first element is added, time zero for the data
+ * structure is initialised to the current time.
+ *
+ * All the elements added in the first two seconds are appended to the first
+ * list.  Elements added in the third second go into the second list, and so on.
+ * If an element is accessed at any point, it is removed from its list and
+ * inserted at the head of the current most-recently-used list.
+ *
+ * The reaper function will have nothing to do until at least twelve seconds
+ * have elapsed since the first element was added.  The reason for this is that
+ * if it were called at t=11s, there could be elements in the first list that
+ * have only been inactive for nine seconds, so it still does nothing.  If it is
+ * called anywhere between t=12 and t=14 seconds, it will delete all the
+ * elements that remain in the first list.  It's therefore possible for elements
+ * to remain in the data store even after they've been inactive for up to
+ * (t + t/g) seconds, where t is the inactive element lifetime and g is the
+ * number of groups.
+ *
+ * The above example assumes that the reaper function gets called at least once
+ * every (t/g) seconds.  If it is called less frequently, unused elements will
+ * accumulate in the reap list until the reaper function is eventually called.
+ * The current implementation uses work queue callbacks to carefully time the
+ * reaper function calls, so this should happen rarely, if at all.
+ *
+ * From a design perspective, the primary reason for the choice of a list array
+ * representing discrete time intervals is that it's only practical to reap
+ * expired elements in groups of some appreciable size.  This automatically
+ * introduces a granularity to element lifetimes, so there's no point storing an
+ * individual timeout with each element that specifies a more precise reap time.
+ * The bonus is a saving of sizeof(long) bytes of memory per element stored.
+ *
+ * The elements could have been stored in just one list, but an array of
+ * counters or pointers would need to be maintained to allow them to be divided
+ * up into discrete time groups.  More critically, the process of touching or
+ * removing an element would involve walking large portions of the entire list,
+ * which would have a detrimental effect on performance.  The additional memory
+ * requirement for the array of list heads is minimal.
+ *
+ * When an element is touched or deleted, it needs to be removed from its
+ * current list.  Doubly linked lists are used to make the list maintenance
+ * portion of these operations O(1).  Since reaper timing can be imprecise,
+ * inserts and lookups can occur when there are no free lists available.  When
+ * this happens, all the elements on the LRU list need to be migrated to the end
+ * of the reap list.  To keep the list maintenance portion of these operations
+ * O(1) also, list tails need to be accessible without walking the entire list.
+ * This is the reason why doubly linked list heads are used.
+ */
+
+/*
+ * An MRU Cache is a dynamic data structure that stores its elements in a way
+ * that allows efficient lookups, but also groups them into discrete time
+ * intervals based on insertion time.  This allows elements to be efficiently
+ * and automatically reaped after a fixed period of inactivity.
+ *
+ * When a client data pointer is stored in the MRU Cache it needs to be added to
+ * both the data store and to one of the lists.  It must also be possible to
+ * access each of these entries via the other, i.e. to:
+ *
+ *    a) Walk a list, removing the corresponding data store entry for each item.
+ *    b) Look up a data store entry, then access its list entry directly.
+ *
+ * To achieve both of these goals, each entry must contain both a list entry and
+ * a key, in addition to the user's data pointer.  Note that it's not a good
+ * idea to have the client embed one of these structures at the top of their own
+ * data structure, because inserting the same item more than once would most
+ * likely result in a loop in one of the lists.  That's a sure-fire recipe for
+ * an infinite loop in the code.
+ */
+typedef struct xfs_mru_cache_elem
+{
+	struct list_head list_node;
+	unsigned long	key;
+	void		*value;
+} xfs_mru_cache_elem_t;
+
+static kmem_zone_t		*xfs_mru_elem_zone;
+static struct workqueue_struct	*xfs_mru_reap_wq;
+
+/*
+ * When inserting, destroying or reaping, it's first necessary to update the
+ * lists relative to a particular time.  In the case of destroying, that time
+ * will be well in the future to ensure that all items are moved to the reap
+ * list.  In all other cases though, the time will be the current time.
+ *
+ * This function enters a loop, moving the contents of the LRU list to the reap
+ * list again and again until either a) the lists are all empty, or b) time zero
+ * has been advanced sufficiently to be within the immediate element lifetime.
+ *
+ * Case a) above is detected by counting how many groups are migrated and
+ * stopping when they've all been moved.  Case b) is detected by monitoring the
+ * time_zero field, which is updated as each group is migrated.
+ *
+ * The return value is the earliest time that more migration could be needed, or
+ * zero if there's no need to schedule more work because the lists are empty.
+ */
+STATIC unsigned long
+_xfs_mru_cache_migrate(
+	xfs_mru_cache_t	*mru,
+	unsigned long	now)
+{
+	unsigned int	grp;
+	unsigned int	migrated = 0;
+	struct list_head *lru_list;
+
+	/* Nothing to do if the data store is empty. */
+	if (!mru->time_zero)
+		return 0;
+
+	/* While time zero is older than the time spanned by all the lists. */
+	while (mru->time_zero <= now - mru->grp_count * mru->grp_time) {
+
+		/*
+		 * If the LRU list isn't empty, migrate its elements to the tail
+		 * of the reap list.
+		 */
+		lru_list = mru->lists + mru->lru_grp;
+		if (!list_empty(lru_list))
+			list_splice_init(lru_list, mru->reap_list.prev);
+
+		/*
+		 * Advance the LRU group number, freeing the old LRU list to
+		 * become the new MRU list; advance time zero accordingly.
+		 */
+		mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count;
+		mru->time_zero += mru->grp_time;
+
+		/*
+		 * If reaping is so far behind that all the elements on all the
+		 * lists have been migrated to the reap list, it's now empty.
+		 */
+		if (++migrated == mru->grp_count) {
+			mru->lru_grp = 0;
+			mru->time_zero = 0;
+			return 0;
+		}
+	}
+
+	/* Find the first non-empty list from the LRU end. */
+	for (grp = 0; grp < mru->grp_count; grp++) {
+
+		/* Check the grp'th list from the LRU end. */
+		lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count);
+		if (!list_empty(lru_list))
+			return mru->time_zero +
+			       (mru->grp_count + grp) * mru->grp_time;
+	}
+
+	/* All the lists must be empty. */
+	mru->lru_grp = 0;
+	mru->time_zero = 0;
+	return 0;
+}
+
+/*
+ * When inserting or doing a lookup, an element needs to be inserted into the
+ * MRU list.  The lists must be migrated first to ensure that they're
+ * up-to-date, otherwise the new element could be given a shorter lifetime in
+ * the cache than it should.
+ */
+STATIC void
+_xfs_mru_cache_list_insert(
+	xfs_mru_cache_t		*mru,
+	xfs_mru_cache_elem_t	*elem)
+{
+	unsigned int	grp = 0;
+	unsigned long	now = jiffies;
+
+	/*
+	 * If the data store is empty, initialise time zero, leave grp set to
+	 * zero and start the work queue timer if necessary.  Otherwise, set grp
+	 * to the number of group times that have elapsed since time zero.
+	 */
+	if (!_xfs_mru_cache_migrate(mru, now)) {
+		mru->time_zero = now;
+		if (!mru->next_reap)
+			mru->next_reap = mru->grp_count * mru->grp_time;
+	} else {
+		grp = (now - mru->time_zero) / mru->grp_time;
+		grp = (mru->lru_grp + grp) % mru->grp_count;
+	}
+
+	/* Insert the element at the tail of the corresponding list. */
+	list_add_tail(&elem->list_node, mru->lists + grp);
+}
+
+/*
+ * When destroying or reaping, all the elements that were migrated to the reap
+ * list need to be deleted.  For each element this involves removing it from the
+ * data store, removing it from the reap list, calling the client's free
+ * function and deleting the element from the element zone.
+ */
+STATIC void
+_xfs_mru_cache_clear_reap_list(
+	xfs_mru_cache_t		*mru)
+{
+	xfs_mru_cache_elem_t	*elem, *next;
+	struct list_head	tmp;
+
+	INIT_LIST_HEAD(&tmp);
+	list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) {
+
+		/* Remove the element from the data store. */
+		radix_tree_delete(&mru->store, elem->key);
+
+		/*
+		 * remove to temp list so it can be freed without
+		 * needing to hold the lock
+		 */
+		list_move(&elem->list_node, &tmp);
+	}
+	mutex_spinunlock(&mru->lock, 0);
+
+	list_for_each_entry_safe(elem, next, &tmp, list_node) {
+
+		/* Remove the element from the reap list. */
+		list_del_init(&elem->list_node);
+
+		/* Call the client's free function with the key and value pointer. */
+		mru->free_func(elem->key, elem->value);
+
+		/* Free the element structure. */
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+	}
+
+	mutex_spinlock(&mru->lock);
+}
+
+/*
+ * We fire the reap timer every group expiry interval so
+ * we always have a reaper ready to run. This makes shutdown
+ * and flushing of the reaper easy to do. Hence we need to
+ * keep when the next reap must occur so we can determine
+ * at each interval whether there is anything we need to do.
+ */
+STATIC void
+_xfs_mru_cache_reap(
+	struct work_struct	*work)
+{
+	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work);
+	unsigned long		now;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return;
+
+	mutex_spinlock(&mru->lock);
+	now = jiffies;
+	if (mru->reap_all ||
+	    (mru->next_reap && time_after(now, mru->next_reap))) {
+		if (mru->reap_all)
+			now += mru->grp_count * mru->grp_time * 2;
+		mru->next_reap = _xfs_mru_cache_migrate(mru, now);
+		_xfs_mru_cache_clear_reap_list(mru);
+	}
+
+	/*
+	 * the process that triggered the reap_all is responsible
+	 * for restating the periodic reap if it is required.
+	 */
+	if (!mru->reap_all)
+		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+	mru->reap_all = 0;
+	mutex_spinunlock(&mru->lock, 0);
+}
+
+int
+xfs_mru_cache_init(void)
+{
+	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
+	                                 "xfs_mru_cache_elem");
+	if (!xfs_mru_elem_zone)
+		return ENOMEM;
+
+	xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+	if (!xfs_mru_reap_wq) {
+		kmem_zone_destroy(xfs_mru_elem_zone);
+		return ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+xfs_mru_cache_uninit(void)
+{
+	destroy_workqueue(xfs_mru_reap_wq);
+	kmem_zone_destroy(xfs_mru_elem_zone);
+}
+
+/*
+ * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create()
+ * with the address of the pointer, a lifetime value in milliseconds, a group
+ * count and a free function to use when deleting elements.  This function
+ * returns 0 if the initialisation was successful.
+ */
+int
+xfs_mru_cache_create(
+	xfs_mru_cache_t		**mrup,
+	unsigned int		lifetime_ms,
+	unsigned int		grp_count,
+	xfs_mru_cache_free_func_t free_func)
+{
+	xfs_mru_cache_t	*mru = NULL;
+	int		err = 0, grp;
+	unsigned int	grp_time;
+
+	if (mrup)
+		*mrup = NULL;
+
+	if (!mrup || !grp_count || !lifetime_ms || !free_func)
+		return EINVAL;
+
+	if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
+		return EINVAL;
+
+	if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
+		return ENOMEM;
+
+	/* An extra list is needed to avoid reaping up to a grp_time early. */
+	mru->grp_count = grp_count + 1;
+	mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+
+	if (!mru->lists) {
+		err = ENOMEM;
+		goto exit;
+	}
+
+	for (grp = 0; grp < mru->grp_count; grp++)
+		INIT_LIST_HEAD(mru->lists + grp);
+
+	/*
+	 * We use GFP_KERNEL radix tree preload and do inserts under a
+	 * spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
+	 */
+	INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
+	INIT_LIST_HEAD(&mru->reap_list);
+	spinlock_init(&mru->lock, "xfs_mru_cache");
+	INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
+
+	mru->grp_time  = grp_time;
+	mru->free_func = free_func;
+
+	/* start up the reaper event */
+	mru->next_reap = 0;
+	mru->reap_all = 0;
+	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+
+	*mrup = mru;
+
+exit:
+	if (err && mru && mru->lists)
+		kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+	if (err && mru)
+		kmem_free(mru, sizeof(*mru));
+
+	return err;
+}
+
+/*
+ * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
+ * free functions as they're deleted.  When this function returns, the caller is
+ * guaranteed that all the free functions for all the elements have finished
+ * executing.
+ *
+ * While we are flushing, we stop the periodic reaper event from triggering.
+ * Normally, we want to restart this periodic event, but if we are shutting
+ * down the cache we do not want it restarted. hence the restart parameter
+ * where 0 = do not restart reaper and 1 = restart reaper.
+ */
+void
+xfs_mru_cache_flush(
+	xfs_mru_cache_t		*mru,
+	int			restart)
+{
+	if (!mru || !mru->lists)
+		return;
+
+	cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+
+	mutex_spinlock(&mru->lock);
+	mru->reap_all = 1;
+	mutex_spinunlock(&mru->lock, 0);
+
+	queue_work(xfs_mru_reap_wq, &mru->work.work);
+	flush_workqueue(xfs_mru_reap_wq);
+
+	mutex_spinlock(&mru->lock);
+	WARN_ON_ONCE(mru->reap_all != 0);
+	mru->reap_all = 0;
+	if (restart)
+		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+	mutex_spinunlock(&mru->lock, 0);
+}
+
+void
+xfs_mru_cache_destroy(
+	xfs_mru_cache_t		*mru)
+{
+	if (!mru || !mru->lists)
+		return;
+
+	/* we don't want the reaper to restart here */
+	xfs_mru_cache_flush(mru, 0);
+
+	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+	kmem_free(mru, sizeof(*mru));
+}
+
+/*
+ * To insert an element, call xfs_mru_cache_insert() with the data store, the
+ * element's key and the client data pointer.  This function returns 0 on
+ * success or ENOMEM if memory for the data element couldn't be allocated.
+ */
+int
+xfs_mru_cache_insert(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key,
+	void		*value)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return EINVAL;
+
+	elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
+	if (!elem)
+		return ENOMEM;
+
+	if (radix_tree_preload(GFP_KERNEL)) {
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+		return ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&elem->list_node);
+	elem->key = key;
+	elem->value = value;
+
+	mutex_spinlock(&mru->lock);
+
+	radix_tree_insert(&mru->store, key, elem);
+	radix_tree_preload_end();
+	_xfs_mru_cache_list_insert(mru, elem);
+
+	mutex_spinunlock(&mru->lock, 0);
+
+	return 0;
+}
+
+/*
+ * To remove an element without calling the free function, call
+ * xfs_mru_cache_remove() with the data store and the element's key.  On success
+ * the client data pointer for the removed element is returned, otherwise this
+ * function will return a NULL pointer.
+ */
+void *
+xfs_mru_cache_remove(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+	void		*value = NULL;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_delete(&mru->store, key);
+	if (elem) {
+		value = elem->value;
+		list_del(&elem->list_node);
+	}
+
+	mutex_spinunlock(&mru->lock, 0);
+
+	if (elem)
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+
+	return value;
+}
+
+/*
+ * To remove and element and call the free function, call xfs_mru_cache_delete()
+ * with the data store and the element's key.
+ */
+void
+xfs_mru_cache_delete(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	void		*value = xfs_mru_cache_remove(mru, key);
+
+	if (value)
+		mru->free_func(key, value);
+}
+
+/*
+ * To look up an element using its key, call xfs_mru_cache_lookup() with the
+ * data store and the element's key.  If found, the element will be moved to the
+ * head of the MRU list to indicate that it's been touched.
+ *
+ * The internal data structures are protected by a spinlock that is STILL HELD
+ * when this function returns.  Call xfs_mru_cache_done() to release it.  Note
+ * that it is not safe to call any function that might sleep in the interim.
+ *
+ * The implementation could have used reference counting to avoid this
+ * restriction, but since most clients simply want to get, set or test a member
+ * of the returned data structure, the extra per-element memory isn't warranted.
+ *
+ * If the element isn't found, this function returns NULL and the spinlock is
+ * released.  xfs_mru_cache_done() should NOT be called when this occurs.
+ */
+void *
+xfs_mru_cache_lookup(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_lookup(&mru->store, key);
+	if (elem) {
+		list_del(&elem->list_node);
+		_xfs_mru_cache_list_insert(mru, elem);
+	}
+	else
+		mutex_spinunlock(&mru->lock, 0);
+
+	return elem ? elem->value : NULL;
+}
+
+/*
+ * To look up an element using its key, but leave its location in the internal
+ * lists alone, call xfs_mru_cache_peek().  If the element isn't found, this
+ * function returns NULL.
+ *
+ * See the comments above the declaration of the xfs_mru_cache_lookup() function
+ * for important locking information pertaining to this call.
+ */
+void *
+xfs_mru_cache_peek(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_lookup(&mru->store, key);
+	if (!elem)
+		mutex_spinunlock(&mru->lock, 0);
+
+	return elem ? elem->value : NULL;
+}
+
+/*
+ * To release the internal data structure spinlock after having performed an
+ * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
+ * with the data store pointer.
+ */
+void
+xfs_mru_cache_done(
+	xfs_mru_cache_t	*mru)
+{
+	mutex_spinunlock(&mru->lock, 0);
+}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
new file mode 100644
index 00000000000..624fd10ee8e
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_MRU_CACHE_H__
+#define __XFS_MRU_CACHE_H__
+
+
+/* Function pointer type for callback to free a client's data pointer. */
+typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
+
+typedef struct xfs_mru_cache
+{
+	struct radix_tree_root	store;     /* Core storage data structure.  */
+	struct list_head	*lists;    /* Array of lists, one per grp.  */
+	struct list_head	reap_list; /* Elements overdue for reaping. */
+	spinlock_t		lock;      /* Lock to protect this struct.  */
+	unsigned int		grp_count; /* Number of discrete groups.    */
+	unsigned int		grp_time;  /* Time period spanned by grps.  */
+	unsigned int		lru_grp;   /* Group containing time zero.   */
+	unsigned long		time_zero; /* Time first element was added. */
+	unsigned long		next_reap; /* Time that the reaper should
+					      next do something. */
+	unsigned int		reap_all;  /* if set, reap all lists */
+	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+	struct delayed_work	work;      /* Workqueue data for reaping.   */
+} xfs_mru_cache_t;
+
+int xfs_mru_cache_init(void);
+void xfs_mru_cache_uninit(void);
+int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
+			     unsigned int grp_count,
+			     xfs_mru_cache_free_func_t free_func);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
+int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
+				void *value);
+void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_done(struct xfs_mru_cache *mru);
+
+#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b3a5f07bd07..47082c01872 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1882,11 +1882,13 @@ xfs_growfs_rt(
 	    (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
 	    (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
 		return XFS_ERROR(EINVAL);
+	if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
+		return error;
 	/*
 	 * Read in the last block of the device, make sure it exists.
 	 */
 	error = xfs_read_buf(mp, mp->m_rtdev_targp,
-			XFS_FSB_TO_BB(mp, in->newblocks - 1),
+			XFS_FSB_TO_BB(mp, nrblocks - 1),
 			XFS_FSB_TO_BB(mp, 1), 0, &bp);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 188b296ff50..fcf28dbded7 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -72,6 +72,34 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
 }
 
 /*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_LOCK	(1<<0)
+#define XFS_FREE_EOF_NOLOCK	(1<<1)
+
+
+/*
+ * helper function to extract extent size hint from inode
+ */
+STATIC_INLINE xfs_extlen_t
+xfs_get_extsz_hint(
+	xfs_inode_t	*ip)
+{
+	xfs_extlen_t	extsz;
+
+	if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+		extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+				? ip->i_d.di_extsize
+				: ip->i_mount->m_sb.sb_rextsize;
+		ASSERT(extsz);
+	} else {
+		extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+				? ip->i_d.di_extsize : 0;
+	}
+	return extsz;
+}
+
+/*
  * Prototypes for functions in xfs_rw.c.
  */
 extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -91,10 +119,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
 extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
-		       cred_t *credp);
+			cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
-				 xfs_off_t offset, cred_t *credp, int flags);
+			xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
-			   cred_t *credp);
+			cred_t *credp);
+extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
+			int flags);
 
 #endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8..ef42537a607 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
  */
 #define XFS_SB_VERSION2_REALFBITS	0x00ffffff	/* Mask: features */
 #define XFS_SB_VERSION2_RESERVED1BIT	0x00000001
-#define XFS_SB_VERSION2_RESERVED2BIT	0x00000002
+#define XFS_SB_VERSION2_LAZYSBCOUNTBIT	0x00000002	/* Superblk counters */
 #define XFS_SB_VERSION2_RESERVED4BIT	0x00000004
 #define XFS_SB_VERSION2_ATTR2BIT	0x00000008	/* Inline attr rework */
 
 #define	XFS_SB_VERSION2_OKREALFBITS	\
-	(XFS_SB_VERSION2_ATTR2BIT)
+	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
+	 XFS_SB_VERSION2_ATTR2BIT)
 #define	XFS_SB_VERSION2_OKSASHFBITS	\
 	(0)
 #define XFS_SB_VERSION2_OKREALBITS	\
@@ -181,6 +182,9 @@ typedef enum {
 #define XFS_SB_SHARED_VN	XFS_SB_MVAL(SHARED_VN)
 #define XFS_SB_UNIT		XFS_SB_MVAL(UNIT)
 #define XFS_SB_WIDTH		XFS_SB_MVAL(WIDTH)
+#define XFS_SB_ICOUNT		XFS_SB_MVAL(ICOUNT)
+#define XFS_SB_IFREE		XFS_SB_MVAL(IFREE)
+#define XFS_SB_FDBLOCKS		XFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
 	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
 	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-	 XFS_SB_FEATURES2)
+	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
 
 
 /*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
  *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
 
+static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_HASMOREBITS(sbp) &&	\
+		((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
+}
+
 #define XFS_SB_VERSION_HASATTR2(sbp)	xfs_sb_version_hasattr2(sbp)
 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
 {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e2..356d6627f58 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
  *
  * Mark the transaction structure to indicate that the superblock
  * needs to be updated before committing.
+ *
+ * Because we may not be keeping track of allocated/free inodes and
+ * used filesystem blocks in the superblock, we do not mark the
+ * superblock dirty in this transaction if we modify these fields.
+ * We still need to update the transaction deltas so that they get
+ * applied to the incore superblock, but we don't want them to
+ * cause the superblock to get locked and logged if these are the
+ * only fields in the superblock that the transaction modifies.
  */
 void
 xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
 	uint		field,
 	int64_t		delta)
 {
+	uint32_t	flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
+	xfs_mount_t	*mp = tp->t_mountp;
 
 	switch (field) {
 	case XFS_TRANS_SB_ICOUNT:
 		tp->t_icount_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_IFREE:
 		tp->t_ifree_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FDBLOCKS:
 		/*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
 			ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
 		}
 		tp->t_fdblocks_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_RES_FDBLOCKS:
 		/*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
 		 */
 		ASSERT(delta < 0);
 		tp->t_res_fdblocks_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FREXTENTS:
 		/*
@@ -515,7 +533,7 @@ xfs_trans_mod_sb(
 		return;
 	}
 
-	tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
+	tp->t_flags |= flags;
 }
 
 /*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
 	       (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
 		tp->t_ag_btree_delta));
 
-	if (tp->t_icount_delta != 0) {
-		INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
-	}
-	if (tp->t_ifree_delta != 0) {
-		INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
-	}
+	/*
+	 * Only update the superblock counters if we are logging them
+	 */
+	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
+		if (tp->t_icount_delta != 0) {
+			INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
+		}
+		if (tp->t_ifree_delta != 0) {
+			INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
+		}
 
-	if (tp->t_fdblocks_delta != 0) {
-		INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
-	}
-	if (tp->t_res_fdblocks_delta != 0) {
-		INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+		if (tp->t_fdblocks_delta != 0) {
+			INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
+		}
+		if (tp->t_res_fdblocks_delta != 0) {
+			INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+		}
 	}
 
 	if (tp->t_frextents_delta != 0) {
@@ -615,11 +638,23 @@ xfs_trans_apply_sb_deltas(
 }
 
 /*
- * xfs_trans_unreserve_and_mod_sb() is called to release unused
- * reservations and apply superblock counter changes to the in-core
- * superblock.
+ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
+ * and apply superblock counter changes to the in-core superblock.  The
+ * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
+ * applied to the in-core superblock.  The idea is that that has already been
+ * done.
  *
  * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
+ * However, we have to ensure that we only modify each superblock field only
+ * once because the application of the delta values may not be atomic. That can
+ * lead to ENOSPC races occurring if we have two separate modifcations of the
+ * free space counter to put back the entire reservation and then take away
+ * what we used.
+ *
+ * If we are not logging superblock counters, then the inode allocated/free and
+ * used block counts are not updated in the on disk superblock. In this case,
+ * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
+ * still need to update the incore superblock with the changes.
  */
 STATIC void
 xfs_trans_unreserve_and_mod_sb(
@@ -627,40 +662,49 @@ xfs_trans_unreserve_and_mod_sb(
 {
 	xfs_mod_sb_t	msb[14];	/* If you add cases, add entries */
 	xfs_mod_sb_t	*msbp;
+	xfs_mount_t	*mp = tp->t_mountp;
 	/* REFERENCED */
 	int		error;
 	int		rsvd;
+	int64_t		blkdelta = 0;
+	int64_t		rtxdelta = 0;
 
 	msbp = msb;
 	rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
-	/*
-	 * Release any reserved blocks.  Any that were allocated
-	 * will be taken back again by fdblocks_delta below.
-	 */
-	if (tp->t_blk_res > 0) {
+	/* calculate free blocks delta */
+	if (tp->t_blk_res > 0)
+		blkdelta = tp->t_blk_res;
+
+	if ((tp->t_fdblocks_delta != 0) &&
+	    (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	     (tp->t_flags & XFS_TRANS_SB_DIRTY)))
+	        blkdelta += tp->t_fdblocks_delta;
+
+	if (blkdelta != 0) {
 		msbp->msb_field = XFS_SBS_FDBLOCKS;
-		msbp->msb_delta = tp->t_blk_res;
+		msbp->msb_delta = blkdelta;
 		msbp++;
 	}
 
-	/*
-	 * Release any reserved real time extents .  Any that were
-	 * allocated will be taken back again by frextents_delta below.
-	 */
-	if (tp->t_rtx_res > 0) {
+	/* calculate free realtime extents delta */
+	if (tp->t_rtx_res > 0)
+		rtxdelta = tp->t_rtx_res;
+
+	if ((tp->t_frextents_delta != 0) &&
+	    (tp->t_flags & XFS_TRANS_SB_DIRTY))
+		rtxdelta += tp->t_frextents_delta;
+
+	if (rtxdelta != 0) {
 		msbp->msb_field = XFS_SBS_FREXTENTS;
-		msbp->msb_delta = tp->t_rtx_res;
+		msbp->msb_delta = rtxdelta;
 		msbp++;
 	}
 
-	/*
-	 * Apply any superblock modifications to the in-core version.
-	 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
-	 * explicitly NOT applied to the in-core superblock.
-	 * The idea is that that has already been done.
-	 */
-	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+	/* apply remaining deltas */
+
+	if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
 		if (tp->t_icount_delta != 0) {
 			msbp->msb_field = XFS_SBS_ICOUNT;
 			msbp->msb_delta = tp->t_icount_delta;
@@ -671,16 +715,9 @@ xfs_trans_unreserve_and_mod_sb(
 			msbp->msb_delta = tp->t_ifree_delta;
 			msbp++;
 		}
-		if (tp->t_fdblocks_delta != 0) {
-			msbp->msb_field = XFS_SBS_FDBLOCKS;
-			msbp->msb_delta = tp->t_fdblocks_delta;
-			msbp++;
-		}
-		if (tp->t_frextents_delta != 0) {
-			msbp->msb_field = XFS_SBS_FREXTENTS;
-			msbp->msb_delta = tp->t_frextents_delta;
-			msbp++;
-		}
+	}
+
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
 		if (tp->t_dblocks_delta != 0) {
 			msbp->msb_field = XFS_SBS_DBLOCKS;
 			msbp->msb_delta = tp->t_dblocks_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366..0e26e729023 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
 #define	XFS_TRANS_GROWFSRT_ZERO		38
 #define	XFS_TRANS_GROWFSRT_FREE		39
 #define	XFS_TRANS_SWAPEXT		40
-#define	XFS_TRANS_TYPE_MAX		40
+#define	XFS_TRANS_SB_COUNT		41
+#define	XFS_TRANS_TYPE_MAX		41
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 65c561201cb..11f5ea29a03 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -51,6 +51,8 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
 #include "xfs_fsops.h"
 
 STATIC int	xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -81,6 +83,8 @@ xfs_init(void)
 	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
 	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
 	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
+	xfs_mru_cache_init();
+	xfs_filestream_init();
 
 	/*
 	 * The size of the zone allocated buf log item is the maximum
@@ -164,6 +168,8 @@ xfs_cleanup(void)
 	xfs_cleanup_procfs();
 	xfs_sysctl_unregister();
 	xfs_refcache_destroy();
+	xfs_filestream_uninit();
+	xfs_mru_cache_uninit();
 	xfs_acl_zone_destroy(xfs_acl_zone);
 
 #ifdef XFS_DIR2_TRACE
@@ -320,6 +326,9 @@ xfs_start_flags(
 	else
 		mp->m_flags &= ~XFS_MOUNT_BARRIER;
 
+	if (ap->flags2 & XFSMNT2_FILESTREAMS)
+		mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+
 	return 0;
 }
 
@@ -518,6 +527,9 @@ xfs_mount(
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
 
+	if ((error = xfs_filestream_mount(mp)))
+		goto error2;
+
 	error = XFS_IOINIT(vfsp, args, flags);
 	if (error)
 		goto error2;
@@ -575,6 +587,13 @@ xfs_unmount(
 	 */
 	xfs_refcache_purge_mp(mp);
 
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
 	XFS_bflush(mp->m_ddev_targp);
 	error = xfs_unmount_flush(mp, 0);
 	if (error)
@@ -640,7 +659,7 @@ xfs_quiesce_fs(
 	 * we can write the unmount record.
 	 */
 	do {
-		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
+		xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
 		if (!pincount) {
 			delay(50);
@@ -651,6 +670,30 @@ xfs_quiesce_fs(
 	return 0;
 }
 
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+STATIC void
+xfs_attr_quiesce(
+	xfs_mount_t	*mp)
+{
+	/* wait for all modifications to complete */
+	while (atomic_read(&mp->m_active_trans) > 0)
+		delay(100);
+
+	/* flush inodes and push all remaining buffers out to disk */
+	xfs_quiesce_fs(mp);
+
+	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
+	/* Push the superblock and write an unmount record */
+	xfs_log_sbcount(mp, 1);
+	xfs_log_unmount_write(mp);
+	xfs_unmountfs_writesb(mp);
+}
+
 STATIC int
 xfs_mntupdate(
 	bhv_desc_t			*bdp,
@@ -670,10 +713,9 @@ xfs_mntupdate(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 		}
 	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
-		bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
-		xfs_quiesce_fs(mp);
-		xfs_log_unmount_write(mp);
-		xfs_unmountfs_writesb(mp);
+		xfs_filestream_flush(mp);
+		bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
+		xfs_attr_quiesce(mp);
 		vfsp->vfs_flag |= VFS_RDONLY;
 	}
 	return 0;
@@ -887,6 +929,9 @@ xfs_sync(
 {
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
+	if (flags & SYNC_IOWAIT)
+		xfs_filestream_flush(mp);
+
 	return xfs_syncsub(mp, flags, NULL);
 }
 
@@ -1128,58 +1173,41 @@ xfs_sync_inodes(
 		 * in the inode list.
 		 */
 
-		if ((flags & SYNC_CLOSE)  && (vp != NULL)) {
-			/*
-			 * This is the shutdown case.  We just need to
-			 * flush and invalidate all the pages associated
-			 * with the inode.  Drop the inode lock since
-			 * we can't hold it across calls to the buffer
-			 * cache.
-			 *
-			 * We don't set the VREMAPPING bit in the vnode
-			 * here, because we don't hold the vnode lock
-			 * exclusively.  It doesn't really matter, though,
-			 * because we only come here when we're shutting
-			 * down anyway.
-			 */
-			xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-			if (XFS_FORCED_SHUTDOWN(mp)) {
-				bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
-			} else {
-				error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
+		/*
+		 * If we have to flush data or wait for I/O completion
+		 * we need to drop the ilock that we currently hold.
+		 * If we need to drop the lock, insert a marker if we
+		 * have not already done so.
+		 */
+		if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
+		    ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
+			if (mount_locked) {
+				IPOINTER_INSERT(ip, mp);
 			}
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-			xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-		} else if ((flags & SYNC_DELWRI) && (vp != NULL)) {
-			if (VN_DIRTY(vp)) {
-				/* We need to have dropped the lock here,
-				 * so insert a marker if we have not already
-				 * done so.
-				 */
-				if (mount_locked) {
-					IPOINTER_INSERT(ip, mp);
-				}
-
-				/*
-				 * Drop the inode lock since we can't hold it
-				 * across calls to the buffer cache.
-				 */
-				xfs_iunlock(ip, XFS_ILOCK_SHARED);
+			if (flags & SYNC_CLOSE) {
+				/* Shutdown case. Flush and invalidate. */
+				if (XFS_FORCED_SHUTDOWN(mp))
+					bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
+				else
+					error = bhv_vop_flushinval_pages(vp, 0,
+								-1, FI_REMAPF);
+			} else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
 				error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
 							-1, fflag, FI_NONE);
-				xfs_ilock(ip, XFS_ILOCK_SHARED);
 			}
 
+			/*
+			 * When freezing, we need to wait ensure all I/O (including direct
+			 * I/O) is complete to ensure no further data modification can take
+			 * place after this point
+			 */
+			if (flags & SYNC_IOWAIT)
+				vn_iowait(vp);
+
+			xfs_ilock(ip, XFS_ILOCK_SHARED);
 		}
-		/*
-		 * When freezing, we need to wait ensure all I/O (including direct
-		 * I/O) is complete to ensure no further data modification can take
-		 * place after this point
-		 */
-		if (flags & SYNC_IOWAIT)
-			vn_iowait(vp);
 
 		if (flags & SYNC_BDFLUSH) {
 			if ((flags & SYNC_ATTR) &&
@@ -1514,6 +1542,15 @@ xfs_syncsub(
 	}
 
 	/*
+	 * If asked, update the disk superblock with incore counter values if we
+	 * are using non-persistent counters so that they don't get too far out
+	 * of sync if we crash or get a forced shutdown. We don't want to force
+	 * this to disk, just get a transaction into the iclogs....
+	 */
+	if (flags & SYNC_SUPER)
+		xfs_log_sbcount(mp, 0);
+
+	/*
 	 * Now check to see if the log needs a "dummy" transaction.
 	 */
 
@@ -1645,6 +1682,7 @@ xfs_vget(
 					 * in stat(). */
 #define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
 #define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
 
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
@@ -1831,6 +1869,8 @@ xfs_parseargs(
 			args->flags |= XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
 			args->flags &= ~XFSMNT_ATTR2;
+		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+			args->flags2 |= XFSMNT2_FILESTREAMS;
 		} else if (!strcmp(this_char, "osyncisdsync")) {
 			/* no-op, this is now the default */
 			cmn_err(CE_WARN,
@@ -1959,9 +1999,9 @@ xfs_showargs(
 }
 
 /*
- * Second stage of a freeze. The data is already frozen, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceding.
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of themetadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
  */
 STATIC void
 xfs_freeze(
@@ -1969,18 +2009,7 @@ xfs_freeze(
 {
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
-	/* wait for all modifications to complete */
-	while (atomic_read(&mp->m_active_trans) > 0)
-		delay(100);
-
-	/* flush inodes and push all remaining buffers out to disk */
-	xfs_quiesce_fs(mp);
-
-	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
-
-	/* Push the superblock and write an unmount record */
-	xfs_log_unmount_write(mp);
-	xfs_unmountfs_writesb(mp);
+	xfs_attr_quiesce(mp);
 	xfs_fs_log_dummy(mp);
 }
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f..79b522779aa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,6 +51,7 @@
 #include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
+#include "xfs_filestream.h"
 
 STATIC int
 xfs_open(
@@ -77,36 +78,6 @@ xfs_open(
 	return 0;
 }
 
-STATIC int
-xfs_close(
-	bhv_desc_t	*bdp,
-	int		flags,
-	lastclose_t	lastclose,
-	cred_t		*credp)
-{
-	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
-	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return XFS_ERROR(EIO);
-
-	if (lastclose != L_TRUE || !VN_ISREG(vp))
-		return 0;
-
-	/*
-	 * If we previously truncated this file and removed old data in
-	 * the process, we want to initiate "early" writeout on the last
-	 * close.  This is an attempt to combat the notorious NULL files
-	 * problem which is particularly noticable from a truncate down,
-	 * buffered (re-)write (delalloc), followed by a crash.  What we
-	 * are effectively doing here is significantly reducing the time
-	 * window where we'd otherwise be exposed to that problem.
-	 */
-	if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
-		return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
-	return 0;
-}
-
 /*
  * xfs_getattr
  */
@@ -183,9 +154,8 @@ xfs_getattr(
 			 * realtime extent size or the realtime volume's
 			 * extent size.
 			 */
-			vap->va_blocksize = ip->i_d.di_extsize ?
-				(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
-				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
+			vap->va_blocksize =
+				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
 		}
 		break;
 	}
@@ -814,6 +784,8 @@ xfs_setattr(
 				di_flags |= XFS_DIFLAG_PROJINHERIT;
 			if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
 				di_flags |= XFS_DIFLAG_NODEFRAG;
+			if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
+				di_flags |= XFS_DIFLAG_FILESTREAM;
 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1201,13 +1173,15 @@ xfs_fsync(
 }
 
 /*
- * This is called by xfs_inactive to free any blocks beyond eof,
- * when the link count isn't zero.
+ * This is called by xfs_inactive to free any blocks beyond eof
+ * when the link count isn't zero and by xfs_dm_punch_hole() when
+ * punching a hole to EOF.
  */
-STATIC int
-xfs_inactive_free_eofblocks(
+int
+xfs_free_eofblocks(
 	xfs_mount_t	*mp,
-	xfs_inode_t	*ip)
+	xfs_inode_t	*ip,
+	int		flags)
 {
 	xfs_trans_t	*tp;
 	int		error;
@@ -1216,6 +1190,7 @@ xfs_inactive_free_eofblocks(
 	xfs_filblks_t	map_len;
 	int		nimaps;
 	xfs_bmbt_irec_t	imap;
+	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
 	/*
 	 * Figure out if there are any blocks beyond the end
@@ -1256,11 +1231,14 @@ xfs_inactive_free_eofblocks(
 		 * cache and we can't
 		 * do that within a transaction.
 		 */
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		if (use_iolock)
+			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
 				    ip->i_size);
 		if (error) {
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			xfs_trans_cancel(tp, 0);
+			if (use_iolock)
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return error;
 		}
 
@@ -1297,7 +1275,8 @@ xfs_inactive_free_eofblocks(
 			error = xfs_trans_commit(tp,
 						XFS_TRANS_RELEASE_LOG_RES);
 		}
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
+					    : XFS_ILOCK_EXCL));
 	}
 	return error;
 }
@@ -1560,6 +1539,31 @@ xfs_release(
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
 		return 0;
 
+	if (!XFS_FORCED_SHUTDOWN(mp)) {
+		/*
+		 * If we are using filestreams, and we have an unlinked
+		 * file that we are processing the last close on, then nothing
+		 * will be able to reopen and write to this file. Purge this
+		 * inode from the filestreams cache so that it doesn't delay
+		 * teardown of the inode.
+		 */
+		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
+			xfs_filestream_deassociate(ip);
+
+		/*
+		 * If we previously truncated this file and removed old data
+		 * in the process, we want to initiate "early" writeout on
+		 * the last close.  This is an attempt to combat the notorious
+		 * NULL files problem which is particularly noticable from a
+		 * truncate down, buffered (re-)write (delalloc), followed by
+		 * a crash.  What we are effectively doing here is
+		 * significantly reducing the time window where we'd otherwise
+		 * be exposed to that problem.
+		 */
+		if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
+			bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
+	}
+
 #ifdef HAVE_REFCACHE
 	/* If we are in the NFS reference cache then don't do this now */
 	if (ip->i_refcache)
@@ -1573,7 +1577,8 @@ xfs_release(
 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
 		    (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			if (error)
 				return error;
 			/* Update linux inode block count after free above */
 			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1654,7 +1659,8 @@ xfs_inactive(
 		     (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 		      (ip->i_delayed_blks != 0)))) {
-			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			if (error)
 				return VN_INACTIVE_CACHE;
 			/* Update linux inode block count after free above */
 			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1680,6 +1686,7 @@ xfs_inactive(
 
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
 		if (error) {
+			xfs_trans_cancel(tp, 0);
 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
@@ -2217,9 +2224,9 @@ static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
 	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-		lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
 	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
-		lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
 
 	return lock_mode;
 }
@@ -2546,6 +2553,15 @@ xfs_remove(
 	 */
 	xfs_refcache_purge_ip(ip);
 
+	/*
+	 * If we are using filestreams, kill the stream association.
+	 * If the file is still open it may get a new one but that
+	 * will get killed on last close in xfs_close() so we don't
+	 * have to worry about that.
+	 */
+	if (link_zero && xfs_inode_is_filestream(ip))
+		xfs_filestream_deassociate(ip);
+
 	vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
 
 	/*
@@ -4047,22 +4063,16 @@ xfs_alloc_file_space(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	rt = XFS_IS_REALTIME_INODE(ip);
-	if (unlikely(rt)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
-
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
 	if (len <= 0)
 		return XFS_ERROR(EINVAL);
 
+	rt = XFS_IS_REALTIME_INODE(ip);
+	extsz = xfs_get_extsz_hint(ip);
+
 	count = len;
-	error = 0;
 	imapp = &imaps[0];
 	nimaps = 1;
 	bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
@@ -4678,11 +4688,7 @@ xfs_change_file_space(
 bhv_vnodeops_t xfs_vnodeops = {
 	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
 	.vop_open		= xfs_open,
-	.vop_close		= xfs_close,
 	.vop_read		= xfs_read,
-#ifdef HAVE_SENDFILE
-	.vop_sendfile		= xfs_sendfile,
-#endif
 #ifdef HAVE_SPLICE
 	.vop_splice_read	= xfs_splice_read,
 	.vop_splice_write	= xfs_splice_write,