Merge branch 'master' into upstream

author: Jeff Garzik <jeff@garzik.org> 2006-09-24 01:52:47 -0400
committer: Jeff Garzik <jeff@garzik.org> 2006-09-24 01:52:47 -0400
commit: 23930fa1cebfea6f79881c588ccd1b0781e49e3f (patch)
tree: 36d29e3f83661c4f5f45b6f74ac0d5f9886867a8 /fs
parent: 36b35a5be0e4b406acd816e2122d153e875105be (diff)
parent: 4f5537de7c1531398e84e18a24f667e49cc94208 (diff)
71 files changed, 3867 insertions, 1861 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9faabc..a27002668bd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@ config FS_POSIX_ACL
 source "fs/xfs/Kconfig"
 
 config OCFS2_FS
-	tristate "OCFS2 file system support (EXPERIMENTAL)"
-	depends on NET && SYSFS && EXPERIMENTAL
+	tristate "OCFS2 file system support"
+	depends on NET && SYSFS
 	select CONFIGFS_FS
 	select JBD
 	select CRC32
@@ -1471,8 +1471,8 @@ config NFS_V4
 	  If unsure, say N.
 
 config NFS_DIRECTIO
-	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
-	depends on NFS_FS && EXPERIMENTAL
+	bool "Allow direct I/O on NFS files"
+	depends on NFS_FS
 	help
 	  This option enables applications to perform uncached I/O on files
 	  in NFS file systems using the O_DIRECT open() flag.  When O_DIRECT
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0feb3bd49cb..1eb9a2ec0a3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+
 Version 1.45
 ------------
 Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@ on requests on other threads.  Improve POSIX locking emulation,
 (lock cancel now works, and unlock of merged range works even
 to Windows servers now).  Fix oops on mount to lanman servers
 (win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
 
 Version 1.44
 ------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ad58eb0c4d6..fd1e52ebcee 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,5 +40,7 @@ struct cifs_sb_info {
 	mode_t	mnt_file_mode;
 	mode_t	mnt_dir_mode;
 	int     mnt_cifs_flags;
+	int	prepathlen;
+	char *  prepath;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cd750029be..c3ef1c0d0e6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* BB we could add a second check for a QFS Unix capability bit */
 /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
     if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-#endif /* CIFS_EXPERIMENTAL */
 	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
 	/* Old Windows servers do not support level 103, retry with level 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 39ee8ef3bde..bea875d9a46 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.45"
+#define CIFS_VERSION   "1.46"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 86239023545..81df2bf8e75 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1344,6 +1344,7 @@ struct smb_t2_rsp {
 #define SMB_QUERY_ATTR_FLAGS            0x206  /* append,immutable etc. */
 #define SMB_QUERY_POSIX_PERMISSION      0x207
 #define SMB_QUERY_POSIX_LOCK            0x208
+/* #define SMB_POSIX_OPEN  		0x209 */
 #define SMB_QUERY_FILE_INTERNAL_INFO    0x3ee
 #define SMB_QUERY_FILE_ACCESS_INFO      0x3f0
 #define SMB_QUERY_FILE_NAME_INFO2       0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@ struct smb_t2_rsp {
 #define SMB_SET_XATTR                   0x205
 #define SMB_SET_ATTR_FLAGS              0x206  /* append, immutable etc. */
 #define SMB_SET_POSIX_LOCK              0x208
+#define SMB_POSIX_OPEN                  0x209
 #define SMB_SET_FILE_BASIC_INFO2        0x3ec
 #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
 #define SMB_FILE_ALL_INFO2              0x3fa
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d394c72686..0e9ba0b9d71 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@ struct smb_vol {
 	unsigned int wsize;
 	unsigned int sockopt;
 	unsigned short int port;
+	char * prepath;
 };
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
@@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				printk(KERN_WARNING "CIFS: domain name too long\n");
 				return 1;
 			}
+                } else if (strnicmp(data, "prefixpath", 10) == 0) {
+                        if (!value || !*value) {
+                                printk(KERN_WARNING
+                                       "CIFS: invalid path prefix\n");
+                                return 1;       /* needs_arg; */
+                        }
+                        if ((temp_len = strnlen(value, 1024)) < 1024) {
+				if(value[0] != '/')
+					temp_len++;  /* missing leading slash */
+                                vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
+                                if(vol->prepath == NULL)
+                                        return 1;
+				if(value[0] != '/') {
+					vol->prepath[0] = '/';
+	                                strcpy(vol->prepath+1,value);
+				} else
+					strcpy(vol->prepath,value);
+				cFYI(1,("prefix path %s",vol->prepath));
+                        } else {
+                                printk(KERN_WARNING "CIFS: prefix too long\n");
+                                return 1;
+                        }
 		} else if (strnicmp(data, "iocharset", 9) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
            locations such as env variables and files on disk */
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* we failed translating address */
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -EINVAL;
 		}
@@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		cERROR(1,("Connecting to DFS root not implemented yet"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	} else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -ELIBACC;
 		}
@@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	else {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		}
@@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		} else {
@@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 				kfree(volume_info.UNC);
 				kfree(volume_info.password);
+				kfree(volume_info.prepath);
 				FreeXid(xid);
 				return rc;
 			}
@@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* Windows ME may prefer this */
 			cFYI(1,("readsize set to minimum 2048"));
 		}
+		/* calculate prepath */
+		cifs_sb->prepath = volume_info.prepath;
+		if(cifs_sb->prepath) {
+			cifs_sb->prepathlen = strlen(cifs_sb->prepath);
+			cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+			volume_info.prepath = NULL;
+		} else 
+			cifs_sb->prepathlen = 0;
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
 		cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	the password ptr is put in the new session structure (in which case the
 	password will be freed at unmount time) */
 	kfree(volume_info.UNC);
+	kfree(volume_info.prepath);
 	FreeXid(xid);
 	return rc;
 }
@@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	int xid;
 	struct cifsSesInfo *ses = NULL;
 	struct task_struct *cifsd_task;
+	char * tmp;
 
 	xid = GetXid();
 
@@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	}
 	
 	cifs_sb->tcon = NULL;
+	tmp = cifs_sb->prepath;
+	cifs_sb->prepathlen = 0;
+	cifs_sb->prepath = NULL;
+	kfree(tmp);
 	if (ses)
 		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 914239d5363..66b825ade3e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -46,7 +46,8 @@ char *
 build_path_from_dentry(struct dentry *direntry)
 {
 	struct dentry *temp;
-	int namelen = 0;
+	int namelen;
+	int pplen;
 	char *full_path;
 	char dirsep;
 
@@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry)
 		when the server crashed */
 
 	dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
+	pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
+	namelen = pplen; 
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
@@ -70,7 +73,6 @@ cifs_bp_rename_retry:
 	if(full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
-
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen -= 1 + temp->d_name.len;
 		if (namelen < 0) {
@@ -79,7 +81,7 @@ cifs_bp_rename_retry:
 			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
-			cFYI(0, (" name: %s ", full_path + namelen));
+			cFYI(0, ("name: %s", full_path + namelen));
 		}
 		temp = temp->d_parent;
 		if(temp == NULL) {
@@ -88,18 +90,23 @@ cifs_bp_rename_retry:
 			return NULL;
 		}
 	}
-	if (namelen != 0) {
+	if (namelen != pplen) {
 		cERROR(1,
-		       ("We did not end path lookup where we expected namelen is %d",
+		       ("did not end path lookup where expected namelen is %d",
 			namelen));
-		/* presumably this is only possible if we were racing with a rename 
+		/* presumably this is only possible if racing with a rename 
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
 		kfree(full_path);
-		namelen = 0;
 		goto cifs_bp_rename_retry;
 	}
-
+	/* DIR_SEP already set for byte  0 / vs \ but not for
+	   subsequent slashes in prepath which currently must
+	   be entered the right way - not sure if there is an alternative
+	   since the '\' is a valid posix character so we can not switch
+	   those safely to '/' if any are found in the middle of the prepath */
+	/* BB test paths to Windows with '/' in the midst of prepath */
+	strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
 	return full_path;
 }
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9c5ba9084f..ddb012a6802 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 			int stored_rc = 0;
 			struct cifsLockInfo *li, *tmp;
 
+			rc = 0;
 			down(&fid->lock_sem);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 					kfree(li);
 				}
 			}
-		up(&fid->lock_sem);
+			up(&fid->lock_sem);
 		}
 	}
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 067648b7179..18fcec190f8 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
-				CIFSSMBClose(xid, pTcon, fid)
+				CIFSSMBClose(xid, pTcon, fid);
 			}
 		} */  /* BB enable after fixing up return data */
                   		
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df025453dd9..816e8ef6456 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 	return sd;
 }
 
+/*
+ *
+ * Return -EEXIST if there is already a configfs element with the same
+ * name for the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+			   const unsigned char *new)
+{
+	struct configfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = configfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int configfs_make_dirent(struct configfs_dirent * parent_sd,
 			 struct dentry * dentry, void * element,
 			 umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-				     CONFIGFS_DIR);
+	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
+	if (!error)
+		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+					     CONFIGFS_DIR);
 	if (!error) {
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a34ec5..17b392a2049 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+					     struct inode *inode)
 {
 	struct dentry *alias;
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
-	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
-	if (!inode)
-		goto do_negative;
+	if (!inode) {
+		entry->d_inode = NULL;
+		return NULL;
+	}
+
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct qstr *qstr = &alias->d_name;
 
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 		if (memcmp(qstr->name, name, len))
 			continue;
 		dget_locked(alias);
-		spin_unlock(&dcache_lock);
-		BUG_ON(!d_unhashed(alias));
-		iput(inode);
 		return alias;
 	}
+
 	list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
 	entry->d_inode = inode;
 	fsnotify_d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
-	security_d_instantiate(entry, inode);
 	return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *result;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+
+	spin_lock(&dcache_lock);
+	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_lock);
+
+	if (!result) {
+		security_d_instantiate(entry, inode);
+		return NULL;
+	}
+
+	BUG_ON(!d_unhashed(result));
+	iput(inode);
+	return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  	hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash	- add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  
 void d_rehash(struct dentry * entry)
 {
-	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
-	__d_rehash(entry, list);
+	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1386,6 +1407,120 @@ already_unhashed:
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+	struct dentry *dparent, *aparent;
+
+	switch_names(dentry, anon);
+	do_switch(dentry->d_name.len, anon->d_name.len);
+	do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+	dparent = dentry->d_parent;
+	aparent = anon->d_parent;
+
+	dentry->d_parent = (aparent == anon) ? dentry : aparent;
+	list_del(&dentry->d_u.d_child);
+	if (!IS_ROOT(dentry))
+		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+	anon->d_parent = (dparent == dentry) ? anon : dparent;
+	list_del(&anon->d_u.d_child);
+	if (!IS_ROOT(anon))
+		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&anon->d_u.d_child);
+
+	anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *alias, *actual;
+
+	BUG_ON(!d_unhashed(dentry));
+
+	spin_lock(&dcache_lock);
+
+	if (!inode) {
+		actual = dentry;
+		dentry->d_inode = NULL;
+		goto found_lock;
+	}
+
+	/* See if a disconnected directory already exists as an anonymous root
+	 * that we should splice into the tree instead */
+	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+		spin_lock(&alias->d_lock);
+
+		/* Is this a mountpoint that we could splice into our tree? */
+		if (IS_ROOT(alias))
+			goto connect_mountpoint;
+
+		if (alias->d_name.len == dentry->d_name.len &&
+		    alias->d_parent == dentry->d_parent &&
+		    memcmp(alias->d_name.name,
+			   dentry->d_name.name,
+			   dentry->d_name.len) == 0)
+			goto replace_with_alias;
+
+		spin_unlock(&alias->d_lock);
+
+		/* Doh! Seem to be aliasing directories for some reason... */
+		dput(alias);
+	}
+
+	/* Add a unique reference */
+	actual = __d_instantiate_unique(dentry, inode);
+	if (!actual)
+		actual = dentry;
+	else if (unlikely(!d_unhashed(actual)))
+		goto shouldnt_be_hashed;
+
+found_lock:
+	spin_lock(&actual->d_lock);
+found:
+	_d_rehash(actual);
+	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_lock);
+
+	if (actual == dentry) {
+		security_d_instantiate(dentry, inode);
+		return NULL;
+	}
+
+	iput(inode);
+	return actual;
+
+	/* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+	__d_materialise_dentry(dentry, alias);
+
+	/* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+	__d_drop(alias);
+	actual = alias;
+	goto found;
+
+shouldnt_be_hashed:
+	spin_unlock(&dcache_lock);
+	BUG();
+	goto shouldnt_be_hashed;
+}
+
 /**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index d4870432ecf..b1981d0e95a 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
 
 #endif  /*  EXT2FS_DEBUG  */
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_blocks (struct super_block * sb)
 {
 	struct ext2_group_desc * desc;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index de85c61c58c..695f69ccf90 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -637,7 +637,6 @@ fail:
 	return ERR_PTR(err);
 }
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_inodes (struct super_block * sb)
 {
 	struct ext2_group_desc *desc;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ca5bfb6914d..4286ff6330b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1083,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	unsigned long overhead;
 	int i;
 
-	lock_super(sb);
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
 	else {
@@ -1124,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
 	buf->f_ffree = ext2_count_free_inodes (sb);
 	buf->f_namelen = EXT2_NAME_LEN;
-	unlock_super(sb);
 	return 0;
 }
 
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e00b8..3a566077ac9 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
 
 	uint16_t flags;
 	uint8_t usercompr;
-#if !defined (__ECOS)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
 	struct inode vfs_inode;
-#endif
-#endif
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 7675b33396c..5a6b4d64206 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -21,6 +21,9 @@
 #include <linux/pagemap.h>
 #include "nodelist.h"
 
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this);
+
 void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list)
 {
 	struct jffs2_full_dirent **prev = list;
@@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
 	}
 }
 
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this)
 {
 	if (this->node) {
 		this->node->frags--;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index cae92c14116..0ddfd70307f 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete);
 struct rb_node *rb_next(struct rb_node *);
 struct rb_node *rb_prev(struct rb_node *);
 void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
 void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
 int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 25bc1ae0864..4da09ce1d1f 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
 	rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
-		rc = rc ? rc : -EBADFD;
 		goto out;
 	}
 	rc = save_xattr_datum(c, xd);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df14c2..50dbb67ae0c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
+	struct rpc_clnt		*client = NFS_CLIENT(inode);
+	struct sockaddr_in	addr;
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status, proto, vers;
+	int			status, vers;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		return -ENOLCK;
 	}
 
-	/* Retrieve transport protocol from NFS client */
-	proto = NFS_CLIENT(inode)->cl_xprt->prot;
-
-	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
+	host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
 	if (host == NULL)
 		return -ENOLCK;
 
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a1aec..703fb038c81 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
 #define NLM_HOST_REBIND		(60 * HZ)
 #define NLM_HOST_EXPIRE		((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
 #define NLM_HOST_COLLECT	((nrhosts > NLM_HOST_MAX)? 120 * HZ :  60 * HZ)
-#define NLM_HOST_ADDR(sv)	(&(sv)->s_nlmclnt->cl_xprt->addr)
 
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
@@ -167,7 +166,6 @@ struct rpc_clnt *
 nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
-	struct rpc_xprt	*xprt;
 
 	dprintk("lockd: nlm_bind_host(%08x)\n",
 			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -179,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
 	 * RPC rebind is required
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
-		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -187,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
 					host->h_nextrebind - jiffies);
 		}
 	} else {
-		xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
-		if (IS_ERR(xprt))
-			goto forgetit;
-
-		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->resvport = 1;	/* NLM requires a reserved port */
-
-		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
-					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt))
-			goto forgetit;
-		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		clnt->cl_softrtry = 1; /* All queries are soft */
-
-		host->h_rpcclnt = clnt;
+		unsigned long increment = nlmsvc_timeout * HZ;
+		struct rpc_timeout timeparms = {
+			.to_initval	= increment,
+			.to_increment	= increment,
+			.to_maxval	= increment * 6UL,
+			.to_retries	= 5U,
+		};
+		struct rpc_create_args args = {
+			.protocol	= host->h_proto,
+			.address	= (struct sockaddr *)&host->h_addr,
+			.addrsize	= sizeof(host->h_addr),
+			.timeout	= &timeparms,
+			.servername	= host->h_name,
+			.program	= &nlm_program,
+			.version	= host->h_version,
+			.authflavor	= RPC_AUTH_UNIX,
+			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+					   RPC_CLNT_CREATE_AUTOBIND),
+		};
+
+		clnt = rpc_create(&args);
+		if (!IS_ERR(clnt))
+			host->h_rpcclnt = clnt;
+		else {
+			printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
+			clnt = NULL;
+		}
 	}
 
 	mutex_unlock(&host->h_mutex);
 	return clnt;
-
-forgetit:
-	printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
-	mutex_unlock(&host->h_mutex);
-	return NULL;
 }
 
 /*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f46b3..5954dcb497e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
 static struct rpc_clnt *
 nsm_create(void)
 {
-	struct rpc_xprt		*xprt;
-	struct rpc_clnt		*clnt;
-	struct sockaddr_in	sin;
-
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	sin.sin_port = 0;
-
-	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
-
-	clnt = rpc_create_client(xprt, "localhost",
-				&nsm_program, SM_VERSION,
-				RPC_AUTH_NULL);
-	if (IS_ERR(clnt))
-		goto out_err;
-	clnt->cl_softrtry = 1;
-	clnt->cl_oneshot  = 1;
-	return clnt;
-
-out_err:
-	return clnt;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port	= 0,
+	};
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_UDP,
+		.address	= (struct sockaddr *)&sin,
+		.addrsize	= sizeof(sin),
+		.servername	= "localhost",
+		.program	= &nsm_program,
+		.version	= SM_VERSION,
+		.authflavor	= RPC_AUTH_NULL,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0c196..f4580b44eef 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o \
-			   namespace.o
+nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
+			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8ac14..a3ee11364db 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
 
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
@@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
 
 /*
  * This is the callback kernel thread.
@@ -134,10 +150,8 @@ out_err:
 /*
  * Kill the server process if it is not already up.
  */
-int nfs_callback_down(void)
+void nfs_callback_down(void)
 {
-	int ret = 0;
-
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
@@ -149,20 +163,19 @@ int nfs_callback_down(void)
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
-	return ret;
 }
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
-	struct nfs4_client *clp;
+	struct sockaddr_in *addr = &rqstp->rq_addr;
+	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
-	clp = nfs4_find_client(addr);
+	clp = nfs_find_client(addr, 4);
 	if (clp == NULL)
 		return SVC_DROP;
-	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
-	nfs4_put_client(clp);
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+	nfs_put_client(clp);
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 			if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7fe53a..5676163d26e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
 extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
+#ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up()	(0)
+#define nfs_callback_down()	do {} while(0)
+#endif
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483ecdf..97cf8f71451 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
 unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
 	
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
 	up_read(&nfsi->rwsem);
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
 	return res->status;
@@ -56,12 +57,12 @@ out:
 
 unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct inode *inode;
 	unsigned res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	}
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 00000000000..ec1938d4b81
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1448 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+	[2]			= &nfs_version2,
+#ifdef CONFIG_NFS_V3
+	[3]			= &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+	[4]			= &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfs_version),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+	.program		= &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name			= "nfsacl",
+	.number			= NFS_ACL_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfsacl_version),
+	.version		= nfsacl_version,
+	.stats			= &nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+					   const struct sockaddr_in *addr,
+					   int nfsversion)
+{
+	struct nfs_client *clp;
+	int error;
+
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+		goto error_0;
+
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
+		goto error_1;
+	}
+	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+
+	if (nfsversion == 4) {
+		if (nfs_callback_up() < 0)
+			goto error_2;
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	}
+
+	atomic_set(&clp->cl_count, 1);
+	clp->cl_cons_state = NFS_CS_INITING;
+
+	clp->cl_nfsversion = nfsversion;
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+	if (hostname) {
+		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (!clp->cl_hostname)
+			goto error_3;
+	}
+
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+	return clp;
+
+error_3:
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+error_2:
+	rpciod_down();
+	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+error_1:
+	kfree(clp);
+error_0:
+	return NULL;
+}
+
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+		nfs4_kill_renewd(clp);
+	while (!list_empty(&clp->cl_unused)) {
+		struct nfs4_state_owner *sp;
+
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+		nfs_idmap_delete(clp);
+#endif
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+	nfs4_shutdown_client(clp);
+
+	/* -EIO all pending I/O */
+	if (!IS_ERR(clp->cl_rpcclient))
+		rpc_shutdown_client(clp->cl_rpcclient);
+
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+
+	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+		rpciod_down();
+
+	kfree(clp->cl_hostname);
+	kfree(clp);
+
+	dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+	if (!clp)
+		return;
+
+	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+		list_del(&clp->cl_share_link);
+		spin_unlock(&nfs_client_lock);
+
+		BUG_ON(!list_empty(&clp->cl_superblocks));
+
+		nfs_free_client(clp);
+	}
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != nfsversion)
+			continue;
+
+		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+			   sizeof(clp->cl_addr.sin_addr)) != 0)
+			continue;
+
+		if (clp->cl_addr.sin_port == addr->sin_port)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	atomic_inc(&clp->cl_count);
+	return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = __nfs_find_client(addr, nfsversion);
+	spin_unlock(&nfs_client_lock);
+
+	BUG_ON(clp && clp->cl_cons_state == 0);
+
+	return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+static struct nfs_client *nfs_get_client(const char *hostname,
+					 const struct sockaddr_in *addr,
+					 int nfsversion)
+{
+	struct nfs_client *clp, *new = NULL;
+	int error;
+
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+		hostname ?: "", NIPQUAD(addr->sin_addr),
+		addr->sin_port, nfsversion);
+
+	/* see if the client already exists */
+	do {
+		spin_lock(&nfs_client_lock);
+
+		clp = __nfs_find_client(addr, nfsversion);
+		if (clp)
+			goto found_client;
+		if (new)
+			goto install_client;
+
+		spin_unlock(&nfs_client_lock);
+
+		new = nfs_alloc_client(hostname, addr, nfsversion);
+	} while (new);
+
+	return ERR_PTR(-ENOMEM);
+
+	/* install a new client and return with it unready */
+install_client:
+	clp = new;
+	list_add(&clp->cl_share_link, &nfs_client_list);
+	spin_unlock(&nfs_client_lock);
+	dprintk("--> nfs_get_client() = %p [new]\n", clp);
+	return clp;
+
+	/* found an existing client
+	 * - make sure it's ready before returning
+	 */
+found_client:
+	spin_unlock(&nfs_client_lock);
+
+	if (new)
+		nfs_free_client(new);
+
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		add_wait_queue(&nfs_client_active_wq, &myself);
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current) ||
+			    clp->cl_cons_state > NFS_CS_READY)
+				break;
+			schedule();
+		}
+
+		remove_wait_queue(&nfs_client_active_wq, &myself);
+
+		if (signal_pending(current)) {
+			nfs_put_client(clp);
+			return ERR_PTR(-ERESTARTSYS);
+		}
+	}
+
+	if (clp->cl_cons_state < NFS_CS_READY) {
+		error = clp->cl_cons_state;
+		nfs_put_client(clp);
+		return ERR_PTR(error);
+	}
+
+	BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+	dprintk("--> nfs_get_client() = %p [share]\n", clp);
+	return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+	clp->cl_cons_state = state;
+	wake_up_all(&nfs_client_active_wq);
+}
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+				    unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
+/*
+ * Create an RPC client handle
+ */
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+						unsigned int timeo,
+						unsigned int retrans,
+						rpc_authflavor_t flavor)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_clnt		*clnt = NULL;
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)&clp->cl_addr,
+		.addrsize	= sizeof(clp->cl_addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_hostname,
+		.program	= &nfs_program,
+		.version	= clp->rpc_ops->version,
+		.authflavor	= flavor,
+	};
+
+	if (!IS_ERR(clp->cl_rpcclient))
+		return 0;
+
+	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+	clp->retrans_timeo = timeparms.to_initval;
+	clp->retrans_count = timeparms.to_retries;
+
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(clnt));
+		return PTR_ERR(clnt);
+	}
+
+	clp->cl_rpcclient = clnt;
+	return 0;
+}
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+	int error = 0;
+
+	if (server->nfs_client->cl_nfsversion > 3)
+		goto out;
+	if (server->flags & NFS_MOUNT_NONLM)
+		goto out;
+	error = lockd_up();
+	if (error < 0)
+		server->flags |= NFS_MOUNT_NONLM;
+	else
+		server->destroy = nfs_destroy_server;
+out:
+	return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	if (server->nfs_client->cl_nfsversion != 3)
+		goto out_noacl;
+	if (server->flags & NFS_MOUNT_NOACL)
+		goto out_noacl;
+
+	server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+	if (IS_ERR(server->client_acl))
+		goto out_noacl;
+
+	/* No errors! Assume that Sun nfsacls are supported */
+	server->caps |= NFS_CAP_ACLS;
+	return;
+
+out_noacl:
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	server->flags &= ~NFS_MOUNT_NOACL;
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	server->client = rpc_clone_client(clp->cl_rpcclient);
+	if (IS_ERR(server->client)) {
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		return PTR_ERR(server->client);
+	}
+
+	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(pseudoflavour, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
+		}
+	}
+	server->client->cl_softrtry = 0;
+	if (server->flags & NFS_MOUNT_SOFT)
+		server->client->cl_softrtry = 1;
+
+	server->client->cl_intr = 0;
+	if (server->flags & NFS4_MOUNT_INTR)
+		server->client->cl_intr = 1;
+
+	return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is already initialised */
+		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+	if (clp->cl_nfsversion == 3)
+		clp->rpc_ops = &nfs_v3_clientops;
+#endif
+	/*
+	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
+	 * - RFC 2623, sec 2.3.2
+	 */
+	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+			RPC_AUTH_UNIX);
+	if (error < 0)
+		goto error;
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct nfs_client *clp;
+	int error, nfsvers = 2;
+
+	dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		nfsvers = 3;
+#endif
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	if (IS_ERR(clp)) {
+		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+		return PTR_ERR(clp);
+	}
+
+	error = nfs_init_client(clp, data);
+	if (error < 0)
+		goto error;
+
+	server->nfs_client = clp;
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	/* Start lockd here, before we might error out */
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto error;
+
+	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	if (error < 0)
+		goto error;
+
+	server->namelen  = data->namlen;
+	/* Create a client RPC handle for the NFSv3 ACL management interface */
+	nfs_init_server_aclclient(server);
+	if (clp->cl_nfsversion == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+	return 0;
+
+error:
+	server->nfs_client = NULL;
+	nfs_put_client(clp);
+	dprintk("<-- nfs_init_server() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+	unsigned long max_rpc_payload;
+
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+	}
+
+	server->maxfilesize = fsinfo->maxfilesize;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_client *clp = server->nfs_client;
+	int error;
+
+	dprintk("--> nfs_probe_fsinfo()\n");
+
+	if (clp->rpc_ops->set_capabilities != NULL) {
+		error = clp->rpc_ops->set_capabilities(server, mntfh);
+		if (error < 0)
+			goto out_error;
+	}
+
+	fsinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
+	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+	if (error < 0)
+		goto out_error;
+
+	nfs_server_set_fsinfo(server, &fsinfo);
+
+	/* Get some general file system info */
+	if (server->namelen == 0) {
+		struct nfs_pathconf pathinfo;
+
+		pathinfo.fattr = fattr;
+		nfs_fattr_init(fattr);
+
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+			server->namelen = pathinfo.max_namelen;
+	}
+
+	dprintk("<-- nfs_probe_fsinfo() = 0\n");
+	return 0;
+
+out_error:
+	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+	return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+	target->flags = source->flags;
+	target->acregmin = source->acregmin;
+	target->acregmax = source->acregmax;
+	target->acdirmin = source->acdirmin;
+	target->acdirmax = source->acdirmax;
+	target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+	struct nfs_server *server;
+
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return NULL;
+
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+	/* Zero out the NFS state stuff */
+	INIT_LIST_HEAD(&server->client_link);
+	INIT_LIST_HEAD(&server->master_link);
+
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		kfree(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+	dprintk("--> nfs_free_server()\n");
+
+	spin_lock(&nfs_client_lock);
+	list_del(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	if (server->destroy != NULL)
+		server->destroy(server);
+	if (!IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+
+	nfs_put_client(server->nfs_client);
+
+	nfs_free_iostats(server->io_stats);
+	kfree(server);
+	nfs_release_automount_timer();
+	dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+				     struct nfs_fh *mntfh)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr;
+	int error;
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client representation */
+	error = nfs_init_server(server, data);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+		if (error < 0) {
+			dprintk("nfs_create_server: getattr error = %d\n", -error);
+			goto error;
+		}
+	}
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	return server;
+
+error:
+	nfs_free_server(server);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+		int proto, int timeo, int retrans,
+		rpc_authflavor_t authflavour)
+{
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is initialised already */
+		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v4_clientops;
+
+	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error;
+
+	error = nfs_idmap_new(clp);
+	if (error < 0) {
+		dprintk("%s: failed to create idmapper. Error = %d\n",
+			__FUNCTION__, error);
+		goto error;
+	}
+	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+		const char *hostname, const struct sockaddr_in *addr,
+		rpc_authflavor_t authflavour,
+		int proto, int timeo, int retrans)
+{
+	struct nfs_client *clp;
+	int error;
+
+	dprintk("--> nfs4_set_client()\n");
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(hostname, addr, 4);
+	if (IS_ERR(clp)) {
+		error = PTR_ERR(clp);
+		goto error;
+	}
+	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error_put;
+
+	server->nfs_client = clp;
+	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+	return 0;
+
+error_put:
+	nfs_put_client(clp);
+error:
+	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+	int error;
+
+	dprintk("--> nfs4_init_server()\n");
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	error = nfs_init_server_rpcclient(server, authflavour);
+
+	/* Done */
+	dprintk("<-- nfs4_init_server() = %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+				      const char *hostname,
+				      const struct sockaddr_in *addr,
+				      const char *mntpath,
+				      const char *ip_addr,
+				      rpc_authflavor_t authflavour,
+				      struct nfs_fh *mntfh)
+{
+	struct nfs_fattr fattr;
+	struct nfs_server *server;
+	int error;
+
+	dprintk("--> nfs4_create_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client record */
+	error = nfs4_set_client(server, hostname, addr, authflavour,
+			data->proto, data->timeo, data->retrans);
+	if (error < 0)
+		goto error;
+
+	/* set up the general RPC client */
+	error = nfs4_init_server(server, data, authflavour);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs4_path_walk(server, mntfh, mntpath);
+	if (error < 0)
+		goto error;
+
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	dprintk("<-- nfs4_create_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+					       struct nfs_fh *fh)
+{
+	struct nfs_client *parent_client;
+	struct nfs_server *server, *parent_server;
+	struct nfs_fattr fattr;
+	int error;
+
+	dprintk("--> nfs4_create_referral_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	parent_server = NFS_SB(data->sb);
+	parent_client = parent_server->nfs_client;
+
+	/* Get a client representation.
+	 * Note: NFSv4 always uses TCP, */
+	error = nfs4_set_client(server, data->hostname, data->addr,
+			data->authflavor,
+			parent_server->client->cl_xprt->prot,
+			parent_client->retrans_timeo,
+			parent_client->retrans_count);
+	if (error < 0)
+		goto error;
+
+	/* Initialise the client representation from the parent server */
+	nfs_server_copy_userdata(server, parent_server);
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	error = nfs_init_server_rpcclient(server, data->authflavor);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr);
+	if (error < 0)
+		goto error;
+
+	dprintk("Referral FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+				    struct nfs_fh *fh,
+				    struct nfs_fattr *fattr)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr_fsinfo;
+	int error;
+
+	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+		(unsigned long long) fattr->fsid.major,
+		(unsigned long long) fattr->fsid.minor);
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy data from the source */
+	server->nfs_client = source->nfs_client;
+	atomic_inc(&server->nfs_client->cl_count);
+	nfs_server_copy_userdata(server, source);
+
+	server->fsid = fattr->fsid;
+
+	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	if (error < 0)
+		goto out_free_server;
+	if (!IS_ERR(source->client_acl))
+		nfs_init_server_aclclient(server);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+	if (error < 0)
+		goto out_free_server;
+
+	dprintk("Cloned FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto out_free_server;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_clone_server() = %p\n", server);
+	return server;
+
+out_free_server:
+	nfs_free_server(server);
+	dprintk("<-- nfs_clone_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+	.start	= nfs_server_list_start,
+	.next	= nfs_server_list_next,
+	.stop	= nfs_server_list_stop,
+	.show	= nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+	.open		= nfs_server_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+	.start	= nfs_volume_list_start,
+	.next	= nfs_volume_list_next,
+	.stop	= nfs_volume_list_stop,
+	.show	= nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+	.open		= nfs_volume_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_server_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_client_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_client *clp;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	clp = list_entry(v, struct nfs_client, cl_share_link);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   atomic_read(&clp->cl_count),
+		   clp->cl_hostname);
+
+	return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_volume_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_volume_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_server *server;
+	struct nfs_client *clp;
+	char dev[8], fsid[17];
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT DEV     FSID\n");
+		return 0;
+	}
+	/* display one transport per line on subsequent lines */
+	server = list_entry(v, struct nfs_server, master_link);
+	clp = server->nfs_client;
+
+	snprintf(dev, 8, "%u:%u",
+		 MAJOR(server->s_dev), MINOR(server->s_dev));
+
+	snprintf(fsid, 17, "%llx:%llx",
+		 (unsigned long long) server->fsid.major,
+		 (unsigned long long) server->fsid.minor);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   dev,
+		   fsid);
+
+	return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+	if (!proc_fs_nfs)
+		goto error_0;
+
+	proc_fs_nfs->owner = THIS_MODULE;
+
+	/* a file of servers with which we're dealing */
+	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_1;
+
+	p->proc_fops = &nfs_server_list_fops;
+	p->owner = THIS_MODULE;
+
+	/* a file of volumes that we have mounted */
+	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_2;
+
+	p->proc_fops = &nfs_volume_list_fops;
+	p->owner = THIS_MODULE;
+	return 0;
+
+error_2:
+	remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+	remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+	remove_proc_entry("volumes", proc_fs_nfs);
+	remove_proc_entry("servers", proc_fs_nfs);
+	remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a316c05..57133678db1 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 {
@@ -52,7 +53,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
 			case -NFS4ERR_EXPIRED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
 				goto out_err;
 		}
 	}
@@ -114,7 +115,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
 			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr));
+					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
 			status = -EIO;
 		}
 	}
@@ -176,7 +177,7 @@ static void nfs_msync_inode(struct inode *inode)
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +209,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -232,7 +233,7 @@ restart:
 
 int nfs_do_expire_all_delegations(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -254,11 +255,11 @@ restart:
 	}
 out:
 	spin_unlock(&clp->cl_lock);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
 
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -266,17 +267,17 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
 			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	struct inode *inode;
@@ -299,7 +300,7 @@ restart:
 
 struct recall_threadargs {
 	struct inode *inode;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	const nfs4_stateid *stateid;
 
 	struct completion started;
@@ -310,7 +311,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -371,7 +372,7 @@ out_module_put:
 /*
  * Retrieve the inode associated with a delegation
  */
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
@@ -389,7 +390,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
 /*
  * Mark all delegations as needing to be reclaimed
  */
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	spin_lock(&clp->cl_lock);
@@ -401,7 +402,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
 /*
  * Reap all unclaimed delegations after reboot recovery is done
  */
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation, *n;
 	LIST_HEAD(head);
@@ -423,7 +424,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694652f..2cfd4b24c7f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
 int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
 
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4deb3e..3419c2da9ba 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,7 +30,9 @@
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/pagevec.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -870,14 +872,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct inode *dir,
-		struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+				 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
 	return 0;
 }
 
@@ -902,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	lock_kernel();
 
-	/* If we're doing an exclusive create, optimize away the lookup */
-	if (nfs_is_exclusive_create(dir, nd))
-		goto no_entry;
+	/*
+	 * If we're doing an exclusive create, optimize away the lookup
+	 * but don't hash the dentry.
+	 */
+	if (nfs_is_exclusive_create(dir, nd)) {
+		d_instantiate(dentry, NULL);
+		res = NULL;
+		goto out_unlock;
+	}
 
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
@@ -913,7 +921,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -922,8 +930,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unlock;
+
 no_entry:
-	res = d_add_unique(dentry, inode);
+	res = d_materialise_unique(dentry, inode);
 	if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
@@ -1117,11 +1126,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		dput(dentry);
 		return NULL;
 	}
-	alias = d_add_unique(dentry, inode);
+
+	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
 		dentry = alias;
 	}
+
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	return dentry;
@@ -1143,23 +1154,22 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		struct inode *dir = dentry->d_parent->d_inode;
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
-			goto out_err;
+			return error;
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
-		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
-			goto out_err;
+			return error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out_err;
+		return error;
 	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry))
+		d_rehash(dentry);
 	return 0;
-out_err:
-	d_drop(dentry);
-	return error;
 }
 
 /*
@@ -1440,48 +1450,82 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	return error;
 }
 
-static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+/*
+ * To create a symbolic link, most file systems instantiate a new inode,
+ * add a page to it containing the path, then write it out to the disk
+ * using prepare_write/commit_write.
+ *
+ * Unfortunately the NFS client can't create the in-core inode first
+ * because it needs a file handle to create an in-core inode (see
+ * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
+ * symlink request has completed on the server.
+ *
+ * So instead we allocate a raw page, copy the symname into it, then do
+ * the SYMLINK request with the page as the buffer.  If it succeeds, we
+ * now have a new file handle and can instantiate an in-core NFS inode
+ * and move the raw page into its mapping.
+ */
+static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
+	struct pagevec lru_pvec;
+	struct page *page;
+	char *kaddr;
 	struct iattr attr;
-	struct nfs_fattr sym_attr;
-	struct nfs_fh sym_fh;
-	struct qstr qsymname;
+	unsigned int pathlen = strlen(symname);
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name, symname);
 
-#ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-	/*
-	 * Fill in the sattr for the call.
- 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
-	 */
-	attr.ia_valid = ATTR_MODE;
-	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	if (pathlen > PAGE_SIZE)
+		return -ENAMETOOLONG;
 
-	qsymname.name = symname;
-	qsymname.len  = strlen(symname);
+	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	attr.ia_valid = ATTR_MODE;
 
 	lock_kernel();
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		unlock_kernel();
+		return -ENOMEM;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(kaddr, symname, pathlen);
+	if (pathlen < PAGE_SIZE)
+		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
+	kunmap_atomic(kaddr, KM_USER0);
+
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
-					  &attr, &sym_fh, &sym_attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
 	nfs_end_data_update(dir);
-	if (!error) {
-		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	} else {
-		if (error == -EEXIST)
-			printk("nfs_proc_symlink: %s/%s already exists??\n",
-			       dentry->d_parent->d_name.name, dentry->d_name.name);
+	if (error != 0) {
+		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+			dir->i_sb->s_id, dir->i_ino,
+			dentry->d_name.name, symname, error);
 		d_drop(dentry);
+		__free_page(page);
+		unlock_kernel();
+		return error;
 	}
+
+	/*
+	 * No big deal if we can't add this page to the page cache here.
+	 * READLINK will get the missing page from the server if needed.
+	 */
+	pagevec_init(&lru_pvec, 0);
+	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+							GFP_KERNEL)) {
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+		SetPageUptodate(page);
+		unlock_page(page);
+	} else
+		__free_page(page);
+
 	unlock_kernel();
-	return error;
+	return 0;
 }
 
 static int 
@@ -1638,35 +1682,211 @@ out:
 	return error;
 }
 
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+	put_rpccred(entry->cred);
+	kfree(entry);
+	smp_mb__before_atomic_dec();
+	atomic_long_dec(&nfs_access_nr_entries);
+	smp_mb__after_atomic_dec();
+}
+
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+	LIST_HEAD(head);
+	struct nfs_inode *nfsi;
+	struct nfs_access_entry *cache;
+
+	spin_lock(&nfs_access_lru_lock);
+restart:
+	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct inode *inode;
+
+		if (nr_to_scan-- == 0)
+			break;
+		inode = igrab(&nfsi->vfs_inode);
+		if (inode == NULL)
+			continue;
+		spin_lock(&inode->i_lock);
+		if (list_empty(&nfsi->access_cache_entry_lru))
+			goto remove_lru_entry;
+		cache = list_entry(nfsi->access_cache_entry_lru.next,
+				struct nfs_access_entry, lru);
+		list_move(&cache->lru, &head);
+		rb_erase(&cache->rb_node, &nfsi->access_cache);
+		if (!list_empty(&nfsi->access_cache_entry_lru))
+			list_move_tail(&nfsi->access_cache_inode_lru,
+					&nfs_access_lru_list);
+		else {
+remove_lru_entry:
+			list_del_init(&nfsi->access_cache_inode_lru);
+			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+		}
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&nfs_access_lru_lock);
+	while (!list_empty(&head)) {
+		cache = list_entry(head.next, struct nfs_access_entry, lru);
+		list_del(&cache->lru);
+		nfs_access_free_entry(cache);
+	}
+	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node *n, *dispose = NULL;
+	struct nfs_access_entry *entry;
+
+	/* Unhook entries from the cache */
+	while ((n = rb_first(root_node)) != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+		rb_erase(n, root_node);
+		list_del(&entry->lru);
+		n->rb_left = dispose;
+		dispose = n;
+	}
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
+
+	/* Now kill them all! */
+	while (dispose != NULL) {
+		n = dispose;
+		dispose = n->rb_left;
+		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+	}
+}
+
+void nfs_access_zap_cache(struct inode *inode)
+{
+	/* Remove from global LRU init */
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+		spin_unlock(&nfs_access_lru_lock);
+	}
+
+	spin_lock(&inode->i_lock);
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+}
+
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+	struct nfs_access_entry *entry;
+
+	while (n != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+
+		if (cred < entry->cred)
+			n = n->rb_left;
+		else if (cred > entry->cred)
+			n = n->rb_right;
+		else
+			return entry;
+	}
+	return NULL;
+}
+
 int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct nfs_access_entry *cache;
+	int err = -ENOENT;
 
-	if (cache->cred != cred
-			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
-		return -ENOENT;
-	memcpy(res, cache, sizeof(*res));
-	return 0;
+	spin_lock(&inode->i_lock);
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out_zap;
+	cache = nfs_access_search_rbtree(inode, cred);
+	if (cache == NULL)
+		goto out;
+	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+		goto out_stale;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
+	err = 0;
+out:
+	spin_unlock(&inode->i_lock);
+	return err;
+out_stale:
+	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	list_del(&cache->lru);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(cache);
+	return -ENOENT;
+out_zap:
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+	return -ENOENT;
 }
 
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node **p = &root_node->rb_node;
+	struct rb_node *parent = NULL;
+	struct nfs_access_entry *entry;
 
-	if (cache->cred != set->cred) {
-		if (cache->cred)
-			put_rpccred(cache->cred);
-		cache->cred = get_rpccred(set->cred);
-	}
-	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
 	spin_lock(&inode->i_lock);
-	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+		if (set->cred < entry->cred)
+			p = &parent->rb_left;
+		else if (set->cred > entry->cred)
+			p = &parent->rb_right;
+		else
+			goto found;
+	}
+	rb_link_node(&set->rb_node, parent, p);
+	rb_insert_color(&set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
 	spin_unlock(&inode->i_lock);
+	return;
+found:
+	rb_replace_node(parent, &set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	list_del(&entry->lru);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache == NULL)
+		return;
+	RB_CLEAR_NODE(&cache->rb_node);
 	cache->jiffies = set->jiffies;
+	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
+
+	nfs_access_add_rbtree(inode, cache);
+
+	/* Update accounting */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&nfs_access_nr_entries);
+	smp_mb__after_atomic_inc();
+
+	/* Add inode to global LRU list */
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+		spin_unlock(&nfs_access_lru_lock);
+	}
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e892880d5..be997d64912 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+	res = NFS_PROTO(inode)->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -157,7 +157,7 @@ force_reval:
 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == 2) {
+	if (origin == SEEK_END) {
 		struct inode *inode = filp->f_mapping->host;
 		int retval = nfs_revalidate_file_size(inode, filp);
 		if (retval < 0)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 00000000000..76b08ae9ed8
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,311 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the actual root for this mount */
+	fsinfo.fattr = &fattr;
+
+	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	security_d_instantiate(mntroot, inode);
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+		   struct nfs_fh *mntfh,
+		   const char *path)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct nfs_fh lastfh;
+	struct qstr name;
+	int ret;
+	//int referral_count = 0;
+
+	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+	fsinfo.fattr = &fattr;
+	nfs_fattr_init(&fattr);
+
+	if (*path++ != '/') {
+		dprintk("nfs4_get_root: Path does not begin with a slash\n");
+		return -EINVAL;
+	}
+
+	/* Start by getting the root filehandle from the server */
+	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot obtained referral\n");
+		return -EREMOTE;
+	}
+
+next_component:
+	dprintk("Next: %s\n", path);
+
+	/* extract the next bit of the path */
+	if (!*path)
+		goto path_walk_complete;
+
+	name.name = path;
+	while (*path && *path != '/')
+		path++;
+	name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+	while (*path == '/')
+		path++;
+
+	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+		path += 2;
+		goto eat_dot_dir;
+	}
+
+	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+	    ) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " Mount path contains reference to \"..\"\n");
+		return -EINVAL;
+	}
+
+	/* lookup the next FH in the sequence */
+	memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+						    mntfh, &fattr);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh obtained referral\n");
+		return -EREMOTE;
+	}
+
+	goto next_component;
+
+path_walk_complete:
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+	dprintk("<-- nfs4_path_walk() = 0\n");
+	return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	dprintk("--> nfs4_get_root()\n");
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the info about the server and filesystem */
+	error = nfs4_server_capabilities(server, mntfh);
+	if (error < 0) {
+		dprintk("nfs_get_root: getcaps error = %d\n",
+			-error);
+		return ERR_PTR(error);
+	}
+
+	/* get the actual root for this mount */
+	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, &fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	security_d_instantiate(mntroot, inode);
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	dprintk("<-- nfs4_get_root()\n");
+	return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd57646..82ad7110a1c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
 /* Default cache timeout is 10 minutes */
 unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 struct idmap_hashent {
 	unsigned long ih_expires;
 	__u32 ih_id;
@@ -70,7 +84,6 @@ struct idmap_hashtable {
 };
 
 struct idmap {
-	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -94,24 +107,23 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
         .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
-void
-nfs_idmap_new(struct nfs4_client *clp)
+int
+nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
+	int error;
 
-	if (clp->cl_idmap != NULL)
-		return;
-        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return;
+	BUG_ON(clp->cl_idmap != NULL);
 
-	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
-	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+                return -ENOMEM;
 
-        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+        idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
+		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
-		return;
+		return error;
 	}
 
         mutex_init(&idmap->idmap_lock);
@@ -121,10 +133,11 @@ nfs_idmap_new(struct nfs4_client *clp)
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
 
 	clp->cl_idmap = idmap;
+	return 0;
 }
 
 void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
@@ -477,27 +490,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
 	return (hash);
 }
 
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 }
 
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2245d..e8c143d182c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
 
 void nfs_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct rpc_cred *cred;
-
 	/*
 	 * The following should never happen...
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON (!list_empty(&nfsi->open_files));
+	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
-	cred = nfsi->cache_access.cred;
-	if (cred)
-		put_rpccred(cred);
-	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+	nfs_access_zap_cache(inode);
 }
 
 /**
@@ -242,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		nfsi->cache_access.cred = NULL;
+		nfsi->access_cache = RB_ROOT;
 
 		unlock_new_inode(inode);
 	} else
@@ -722,13 +717,11 @@ void nfs_end_data_update(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Directories and symlinks: invalidate page cache */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-			spin_lock(&inode->i_lock);
-			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-			spin_unlock(&inode->i_lock);
-		}
+	/* Directories: invalidate page cache */
+	if (S_ISDIR(inode->i_mode)) {
+		spin_lock(&inode->i_lock);
+		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
 	}
 	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
@@ -847,6 +840,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it.
+ *
+ * NB: if the server didn't return any post op attributes, this
+ * function will force the retrieval of attributes before the next
+ * NFS request.  Thus it should be used only for operations that
+ * are expected to change one or more attributes, to avoid
+ * unnecessary NFS requests and trips through nfs_update_inode().
  */
 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
@@ -1025,7 +1024,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  out_fileid:
 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
 		(long long)nfsi->fileid, (long long)fattr->fileid);
 	goto out_err;
 }
@@ -1109,6 +1108,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		INIT_LIST_HEAD(&nfsi->dirty);
 		INIT_LIST_HEAD(&nfsi->commit);
 		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+		INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 		atomic_set(&nfsi->data_updates, 0);
 		nfsi->ndirty = 0;
@@ -1144,6 +1145,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_fs_proc_init();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -1184,6 +1189,8 @@ out2:
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_fs_proc_exit();
+out5:
 	return err;
 }
 
@@ -1198,6 +1205,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_nfs_fs();
+	nfs_fs_proc_exit();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5def0f..bea0b016bd7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
 
 #include <linux/mount.h>
 
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -15,7 +27,40 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
-/* namespace-nfs4.c */
+/* client.c */
+extern struct rpc_program nfs_program;
+
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+					    struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+					     const char *,
+					     const struct sockaddr_in *,
+					     const char *,
+					     const char *,
+					     rpc_authflavor_t,
+					     struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+						      struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+					   struct nfs_fh *,
+					   struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+	return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
+
+/* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
 #else
@@ -46,6 +91,7 @@ extern void nfs_destroy_directcache(void);
 #endif
 
 /* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
@@ -54,8 +100,9 @@ extern struct rpc_procinfo nfs3_procedures[];
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
 
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
@@ -66,6 +113,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
 				  struct page *page);
 #endif
 
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
 /* inode.c */
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
@@ -76,10 +126,10 @@ extern void nfs4_clear_inode(struct inode *);
 #endif
 
 /* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
 #endif
 
 extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +138,30 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 
 /* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+		      const struct dentry *droot,
+		      const struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 
-/*
- * Determine the mount path as a string
- */
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
 #ifdef CONFIG_NFS_V4
-	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
-	return NULL;
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+			  struct nfs_fh *mntfh,
+			  const char *path);
 #endif
-}
 
 /*
  * Determine the device name as a string
  */
 static inline char *nfs_devname(const struct vfsmount *mnt_parent,
-			 const struct dentry *dentry,
-			 char *buffer, ssize_t buflen)
+				const struct dentry *dentry,
+				char *buffer, ssize_t buflen)
 {
-	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+	return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+			dentry, buffer, buflen);
 }
 
 /*
@@ -167,20 +217,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
-	int rc, count, in[4];
-
-	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
-	if (rc != 4)
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-	}
-	return 0;
-}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4d421..d507b021207 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/nfs_fs.h>
 
@@ -77,22 +76,19 @@ static struct rpc_clnt *
 mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 		int protocol)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
-
-	xprt = xprt_create_proto(protocol, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-
-	clnt = rpc_create_client(xprt, hostname,
-				&mnt_program, version,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-		clnt->cl_intr = 1;
-	}
-	return clnt;
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_INTR),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169c8ca..77b00684894 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFS namespace
  */
@@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
  * @dentry - pointer to dentry
  * @buffer - result buffer
  * @buflen - length of buffer
@@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
  * This is mainly for use in figuring out the path on the
  * server side when automounting on top of an existing partition.
  */
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+	       const struct dentry *droot,
+	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
 	char *end = buffer+buflen;
@@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
 	*--end = '\0';
 	buflen--;
 	spin_lock(&dcache_lock);
-	while (!IS_ROOT(dentry)) {
+	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -96,15 +100,18 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	struct nfs_fattr fattr;
 	int err;
 
+	dprintk("--> nfs_follow_mountpoint()\n");
+
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
 	dput(nd->dentry);
 	nd->dentry = dget(dentry);
-	if (d_mountpoint(nd->dentry))
-		goto out_follow;
+
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
-	err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+						  &nd->dentry->d_name,
+						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
@@ -132,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
 	path_release(nd);
@@ -172,22 +181,23 @@ void nfs_release_automount_timer(void)
 /*
  * Clone a mountpoint of the appropriate type
  */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+					   const char *devname,
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->rpc_ops->version) {
+	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
-			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 			break;
 		case 4:
-			mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
 	}
 	return mnt;
 #else
-	return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+	return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 #endif
 }
 
@@ -213,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
+	dprintk("--> nfs_do_submount()\n");
+
 	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
@@ -227,5 +239,7 @@ free_page:
 	free_page((unsigned long)page);
 out:
 	dprintk("%s: done\n", __FUNCTION__);
+
+	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391eef6b9..b49501fc0a7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
 #define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
 #define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
 #define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
-#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+1+NFS_sattr_sz)
 #define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
 
 #define NFS_attrstat_sz		(1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
 static int
 nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
 {
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	size_t pad;
+
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+
+	/*
+	 * xdr_encode_pages may have added a few bytes to ensure the
+	 * pathname ends on a 4-byte boundary.  Start encoding the
+	 * attributes after the pad bytes.
+	 */
+	pad = sndbuf->tail->iov_len;
+	if (pad > 0)
+		p++;
 	p = xdr_encode_sattr(p, args->sattr);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f82ce..f8688eaa000 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 }
 
 /*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
  */
 static int
 nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	status = do_proc_get_root(server->client, fhandle, info);
-	if (status && server->client_sys != server->client)
-		status = do_proc_get_root(server->client_sys, fhandle, info);
+	if (status && server->nfs_client->cl_rpcclient != server->client)
+		status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
 	return status;
 }
 
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		  struct iattr *sattr, struct nfs_fh *fhandle,
-		  struct nfs_fattr *fattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		  unsigned int len, struct iattr *sattr)
 {
-	struct nfs_fattr	dir_attr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
 	struct nfs3_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
 		.dir_attr	= &dir_attr,
-		.fh		= fhandle,
-		.fattr		= fattr
+		.fh		= &fhandle,
+		.fattr		= &fattr
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -569,13 +569,19 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS3_MAXPATHLEN)
+	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	nfs_fattr_init(&dir_attr);
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+out:
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -785,7 +791,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -886,7 +892,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
 }
 
-struct nfs_rpc_ops	nfs_v3_clientops = {
+const struct nfs_rpc_ops nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269e975..16556fa4eff 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
 #define NFS3_writeargs_sz	(NFS3_fh_sz+5)
 #define NFS3_createargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
 #define NFS3_mkdirargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
-#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+1+NFS3_sattr_sz)
 #define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
 #define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
 #define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
 	p = xdr_encode_sattr(p, args->sattr);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Copy the page */
+	xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a102860df3..61095fe4b5c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
 };
 
 /*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
  */
@@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 struct nfs4_state_owner {
 	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
+	struct nfs_client    *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	atomic_t	     so_count;
 
@@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2];
 extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27b74e..24e47f3bbd1 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/nfs4namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFSv4 namespace
  */
@@ -23,7 +24,7 @@
 /*
  * Check if fs_root is valid
  */
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 					 char *buffer, ssize_t buflen)
 {
 	char *end = buffer + buflen;
@@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
 
 	n = pathname->ncomponents;
 	while (--n >= 0) {
-		struct nfs4_string *component = &pathname->components[n];
+		const struct nfs4_string *component = &pathname->components[n];
 		buflen -= component->len + 1;
 		if (buflen < 0)
 			goto Elong;
@@ -47,6 +48,68 @@ Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+		       const struct dentry *dentry,
+		       char *buffer, ssize_t buflen)
+{
+	const char *srvpath;
+
+	srvpath = strchr(mnt_parent->mnt_devname, ':');
+	if (srvpath)
+		srvpath++;
+	else
+		srvpath = mnt_parent->mnt_devname;
+
+	return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+				const struct dentry *dentry,
+				const struct nfs4_fs_locations *locations,
+				char *page, char *page2)
+{
+	const char *path, *fs_path;
+
+	path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+	if (IS_ERR(fs_path))
+		return PTR_ERR(fs_path);
+
+	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+		dprintk("%s: path %s does not begin with fsroot %s\n",
+			__FUNCTION__, path, fs_path);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@ Elong:
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 					    const struct dentry *dentry,
-					    struct nfs4_fs_locations *locations)
+					    const struct nfs4_fs_locations *locations)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
 	struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		.dentry = dentry,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
-	char *page, *page2;
-	char *path, *fs_path;
+	char *page = NULL, *page2 = NULL;
 	char *devname;
-	int loc, s;
+	int loc, s, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
@@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 	dprintk("%s: referral at %s/%s\n", __FUNCTION__,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/* Ensure fs path is a prefix of current dentry path */
 	page = (char *) __get_free_page(GFP_USER);
-	if (page == NULL)
+	if (!page)
 		goto out;
+
 	page2 = (char *) __get_free_page(GFP_USER);
-	if (page2 == NULL)
+	if (!page2)
 		goto out;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
-		goto out_free;
-
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
-		goto out_free;
-
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
-		dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
-		goto out_free;
+	/* Ensure fs path is a prefix of current dentry path */
+	error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+	if (error < 0) {
+		mnt = ERR_PTR(error);
+		goto out;
 	}
 
 	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname)) {
 		mnt = (struct vfsmount *)devname;
-		goto out_free;
+		goto out;
 	}
 
 	loc = 0;
 	while (loc < locations->nlocations && IS_ERR(mnt)) {
-		struct nfs4_fs_location *location = &locations->locations[loc];
+		const struct nfs4_fs_location *location = &locations->locations[loc];
 		char *mnt_path;
 
 		if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 			addr.sin_port = htons(NFS_PORT);
 			mountdata.addr = &addr;
 
-			mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+			mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
 			if (!IS_ERR(mnt)) {
 				break;
 			}
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		loc++;
 	}
 
-out_free:
-	free_page((unsigned long)page);
-	free_page((unsigned long)page2);
 out:
+	free_page((unsigned long) page);
+	free_page((unsigned long) page2);
 	dprintk("%s: done\n", __FUNCTION__);
 	return mnt;
 }
@@ -165,7 +220,7 @@ out:
  */
 struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
 		goto out_free;
 
 	/* Get locations */
+	mnt = ERR_PTR(-ENOENT);
+
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %s/%s\n",
+		__FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
 	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
 	dput(parent);
-	if (err != 0 || fs_locations->nlocations <= 0 ||
+	if (err != 0 ||
+	    fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
 		goto out_free;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 153898e1331..47c7e6e3910 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-#define NFS4_POLL_RETRY_MIN	(1*HZ)
+#define NFS4_POLL_RETRY_MIN	(HZ/10)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
 struct nfs4_opendata;
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
-	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_id;
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 				return err;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	}
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
 }
 
@@ -792,11 +792,18 @@ out:
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
+	int ret;
 
-	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+	for (;;) {
+		ret = nfs4_wait_clnt_recover(server->client, clp);
+		if (ret != 0)
+			return ret;
+		if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+			break;
 		nfs4_schedule_state_recovery(clp);
-	return nfs4_wait_clnt_recover(server->client, clp);
+	}
+	return 0;
 }
 
 /*
@@ -867,7 +874,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +960,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -970,7 +977,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	status = -ENOMEM;
 	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
 	if (opendata == NULL)
-		goto err_put_state_owner;
+		goto err_release_rwsem;
 
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
@@ -989,11 +996,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	return 0;
 err_opendata_free:
 	nfs4_opendata_free(opendata);
+err_release_rwsem:
+	up_read(&clp->cl_sem);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
 out_err:
-	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
-	up_read(&clp->cl_sem);
 	*res = NULL;
 	return status;
 }
@@ -1133,7 +1140,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(server->nfs4_state);
+			nfs4_schedule_state_recovery(server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1275,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1298,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1400,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
+/*
+ * get the file handle for the "/" directory on the server
+ */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fsinfo *info)
+			      struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
-	unsigned char *		p;
-	struct qstr		q;
-	struct nfs4_lookup_arg args = {
-		.dir_fh = fhandle,
-		.name = &q,
-		.bitmask = nfs4_fattr_bitmap,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
 	int status;
 
-	/*
-	 * Now we do a separate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done separately so that we can conveniently
-	 * catch an ERR_WRONGSEC if it occurs along the way...
-	 */
 	status = nfs4_lookup_root(server, fhandle, info);
-	if (status)
-		goto out;
-
-	p = server->mnt_path;
-	for (;;) {
-		struct nfs4_exception exception = { };
-
-		while (*p == '/')
-			p++;
-		if (!*p)
-			break;
-		q.name = p;
-		while (*p && (*p != '/'))
-			p++;
-		q.len = p - q.name;
-
-		do {
-			nfs_fattr_init(fattr);
-			status = nfs4_handle_exception(server,
-					rpc_call_sync(server->client, &msg, 0),
-					&exception);
-		} while (exception.retry);
-		if (status == 0)
-			continue;
-		if (status == -ENOENT) {
-			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
-			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
-		}
-		break;
-	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
 		status = nfs4_do_fsinfo(server, fhandle, info);
-out:
 	return nfs4_map_errors(status);
 }
 
@@ -1565,7 +1521,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1583,6 +1539,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	return status;
 }
 
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+		struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = dirfh,
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupfh %s\n", name->name);
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply lookupfh: %d\n", status);
+	if (status == -NFS4ERR_MOVED)
+		status = -EREMOTE;
+	return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+			      struct qstr *name, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_lookupfh(server, dirfh, name,
+						    fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
@@ -1881,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
@@ -2089,24 +2091,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
 	return err;
 }
 
-static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs_fattr dir_fattr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
-		.name = name,
+		.name = &dentry->d_name,
 		.attrs = sattr,
 		.ftype = NF4LNK,
 		.bitmask = server->attr_bitmask,
 	};
 	struct nfs4_create_res res = {
 		.server = server,
-		.fh = fhandle,
-		.fattr = fattr,
+		.fh = &fhandle,
+		.fattr = &fattr,
 		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
@@ -2116,29 +2118,32 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 	};
 	int			status;
 
-	if (path->len > NFS4_MAXPATHLEN)
+	if (len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	arg.u.symlink = path;
-	nfs_fattr_init(fattr);
+
+	arg.u.symlink.pages = &page;
+	arg.u.symlink.len = len;
+	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (!status)
+	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
-	nfs_post_op_update_inode(dir, res.dir_fattr);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
 	return status;
 }
 
-static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, name, path, sattr,
-					fhandle, fattr),
+				_nfs4_proc_symlink(dir, dentry, page,
+							len, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -2521,7 +2526,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
  */
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
@@ -2543,7 +2548,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
 	.rpc_call_done = nfs4_renew_done,
 };
 
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2560,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
 			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2770,7 +2775,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret == 0)
 		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
@@ -2791,7 +2796,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2828,7 +2833,7 @@ static int nfs4_wait_bit_interruptible(void *word)
 	return 0;
 }
 
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
 {
 	sigset_t oldset;
 	int res;
@@ -2871,7 +2876,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -2886,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 			if (ret == 0)
 				exception->retry = 1;
 			break;
+		case -NFS4ERR_FILE_OPEN:
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
@@ -2898,7 +2904,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2922,7 +2928,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2945,7 +2951,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2969,7 +2975,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	long timeout;
 	int err;
@@ -3077,7 +3083,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 			case 0:
 				return 0;
 		}
@@ -3106,7 +3112,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3231,7 +3237,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3349,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	if (p->arg.lock_seqid == NULL)
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
-	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3513,7 +3519,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 	unsigned char fl_flags = request->fl_flags;
 	int status;
 
@@ -3715,7 +3721,7 @@ static struct inode_operations nfs4_file_inode_operations = {
 	.listxattr	= nfs4_listxattr,
 };
 
-struct nfs_rpc_ops	nfs_v4_clientops = {
+const struct nfs_rpc_ops nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
@@ -3723,6 +3729,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
+	.lookupfh	= nfs4_proc_lookupfh,
 	.lookup		= nfs4_proc_lookup,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
@@ -3743,6 +3750,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.statfs		= nfs4_proc_statfs,
 	.fsinfo		= nfs4_proc_fsinfo,
 	.pathconf	= nfs4_proc_pathconf,
+	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.read_done	= nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8e6d8..7b6df1852e7 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
 void
 nfs4_renew_state(void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct nfs_client *clp = (struct nfs_client *)data;
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
@@ -108,7 +108,7 @@ out:
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
 	long timeout;
 
@@ -121,32 +121,20 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
 			__FUNCTION__, (timeout + HZ - 1) / HZ);
 	cancel_delayed_work(&clp->cl_renewd);
 	schedule_delayed_work(&clp->cl_renewd, timeout);
+	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
 
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
-
-	if (!clp)
-		return;
 	flush_scheduled_work();
-	down_write(&clp->cl_sem);
-	if (!list_empty(&server->nfs4_siblings))
-		list_del_init(&server->nfs4_siblings);
-	up_write(&clp->cl_sem);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	if (!list_empty(&clp->cl_superblocks)) {
-		up_read(&clp->cl_sem);
-		return;
-	}
 	cancel_delayed_work(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
 	flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b07a2..5fffbdfa971 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
 
-static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
-void
-init_nfsv4_state(struct nfs_server *server)
-{
-	server->nfs4_state = NULL;
-	INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
-	kfree(server->mnt_path);
-	server->mnt_path = NULL;
-	if (server->nfs4_state) {
-		nfs4_put_client(server->nfs4_state);
-		server->nfs4_state = NULL;
-	}
-}
-
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs4_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-
-	if (nfs_callback_up() < 0)
-		return NULL;
-	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
-		nfs_callback_down();
-		return NULL;
-	}
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
-	init_rwsem(&clp->cl_sem);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
-	spin_lock_init(&clp->cl_lock);
-	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
-	INIT_LIST_HEAD(&clp->cl_superblocks);
-	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
-	clp->cl_rpcclient = ERR_PTR(-EINVAL);
-	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
-	return clp;
-}
-
-static void
-nfs4_free_client(struct nfs4_client *clp)
-{
-	struct nfs4_state_owner *sp;
-
-	while (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
-	nfs_idmap_delete(clp);
-	if (!IS_ERR(clp->cl_rpcclient))
-		rpc_shutdown_client(clp->cl_rpcclient);
-	kfree(clp);
-	nfs_callback_down();
-}
-
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
-		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
-			atomic_inc(&clp->cl_count);
-			return clp;
-		}
-	}
-	return NULL;
-}
-
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	spin_lock(&state_spinlock);
-	clp = __nfs4_find_client(addr);
-	spin_unlock(&state_spinlock);
-	return clp;
-}
-
-struct nfs4_client *
-nfs4_get_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp, *new = NULL;
-
-	spin_lock(&state_spinlock);
-	for (;;) {
-		clp = __nfs4_find_client(addr);
-		if (clp != NULL)
-			break;
-		clp = new;
-		if (clp != NULL) {
-			list_add(&clp->cl_servers, &nfs4_clientid_list);
-			new = NULL;
-			break;
-		}
-		spin_unlock(&state_spinlock);
-		new = nfs4_alloc_client(addr);
-		spin_lock(&state_spinlock);
-		if (new == NULL)
-			break;
-	}
-	spin_unlock(&state_spinlock);
-	if (new)
-		nfs4_free_client(new);
-	return clp;
-}
-
-void
-nfs4_put_client(struct nfs4_client *clp)
-{
-	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
-		return;
-	list_del(&clp->cl_servers);
-	spin_unlock(&state_spinlock);
-	BUG_ON(!list_empty(&clp->cl_superblocks));
-	rpc_wake_up(&clp->cl_rpcwaitq);
-	nfs4_kill_renewd(clp);
-	nfs4_free_client(clp);
-}
-
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
 			nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
 }
 
 u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
 {
 	return clp->cl_lockowner_id ++;
 }
 
 static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp = NULL;
 
@@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
 	return sp;
 }
 
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
 	return cred;
 }
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp, *res = NULL;
 
@@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	spin_lock(&clp->cl_lock);
 	list_del_init(&sp->so_list);
 	spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
@@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	struct rpc_cred *cred = sp->so_cred;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
@@ -752,7 +618,7 @@ out:
 
 static int reclaimer(void *);
 
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 {
 	smp_mb__before_clear_bit();
 	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
 /*
  * State recovery routine
  */
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Schedule a state recovery attempt
  */
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
 {
 	if (!clp)
 		return;
@@ -879,7 +745,7 @@ out_err:
 	return status;
 }
 
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct nfs4_state *state;
@@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 
 static int reclaimer(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
@@ -970,12 +836,12 @@ out:
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.s_addr), -status);
+				NIPQUAD(clp->cl_addr.sin_addr), -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8fb31c..3dd413f52da 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
 
 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
 #ifdef DEBUG
@@ -128,7 +128,7 @@ static int nfs_stat_to_errno(int);
 #define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
-				nfs4_path_maxsz + \
+				1 + \
 				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
 			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
 			       iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
 			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
 			       iap->ia_gid);
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
 
 	switch (create->ftype) {
 	case NF4LNK:
-		RESERVE_SPACE(4 + create->u.symlink->len);
-		WRITE32(create->u.symlink->len);
-		WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+		RESERVE_SPACE(4);
+		WRITE32(create->u.symlink.len);
+		xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
 		break;
 
 	case NF4BLK: case NF4CHR:
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
 	return 0;
 }
 
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
 	uint32_t *p;
 
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
 	return 0;
 }
 
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
         uint32_t *p;
 
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
 	uint32_t *p;
 	unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
 	return 0;
 }
 
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
 	uint32_t len, *p;
 
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
 	return 0;
 }
 
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
 	uint32_t len, *p;
 
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 	fattr->mode |= fmode;
 	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
 		goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 			if (decode_space_limit(xdr, &res->maxsize) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs4_state);
+	return decode_ace(xdr, NULL, res->server->nfs_client);
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
 	return 0;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
 	uint32_t *p;
 	uint32_t opnum;
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
  * a SETCLIENTID request
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
-		struct nfs4_client *clp)
+		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4521,7 +4521,7 @@ static struct {
  * This one is used jointly by NFSv2 and NFSv3.
  */
 static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
 {
 	int i;
 	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea3229..630e50647bb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
 	msg.rpc_resp = &fsinfo;
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -425,16 +425,17 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		 struct iattr *sattr, struct nfs_fh *fhandle,
-		 struct nfs_fattr *fattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		 unsigned int len, struct iattr *sattr)
 {
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
 	struct nfs_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct rpc_message msg = {
@@ -443,13 +444,25 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS2_MAXPATHLEN)
+	if (len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	nfs_fattr_init(fattr);
-	fhandle->size = 0;
+
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
+
+	/*
+	 * V2 SYMLINK requests don't return any attributes.  Setting the
+	 * filehandle size to zero indicates to nfs_instantiate that it
+	 * should fill in the data with a LOOKUP call on the wire.
+	 */
+	if (status == 0) {
+		nfs_fattr_init(&fattr);
+		fhandle.size = 0;
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -671,7 +684,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 }
 
 
-struct nfs_rpc_ops	nfs_v2_clientops = {
+const struct nfs_rpc_ops nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7a9ee00e0c6..69f1549da2b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-			NFS_SERVER(inode)->hostname,
+			NFS_SERVER(inode)->nfs_client->cl_hostname,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			(unsigned long long)rdata->args.pgbase,
@@ -204,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&inode->i_lock);
 
-	nfs_readpage_truncate_uninitialised_page(rdata);
-	if (rdata->res.eof || rdata->res.count == rdata->args.count)
+	if (rdata->res.eof || rdata->res.count == rdata->args.count) {
 		SetPageUptodate(page);
+		if (rdata->res.eof && count != 0)
+			memclear_highpage_flush(page, rdata->args.pgbase, count);
+	}
 	result = 0;
 
 io_error:
@@ -566,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
-	/* Is this a short read? */
-	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+	if (task->tk_status < 0) {
+		if (task->tk_status == -ESTALE) {
+			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+			nfs_mark_for_revalidate(data->inode);
+		}
+	} else if (resp->count < argp->count && !resp->eof) {
+		/* This is a short read! */
 		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -614,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
 	if (error)
 		goto out_error;
 
+	error = -ESTALE;
+	if (NFS_STALE(inode))
+		goto out_error;
+
 	if (file == NULL) {
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
@@ -676,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	};
 	struct inode *inode = mapping->host;
 	struct nfs_server *server = NFS_SERVER(inode);
-	int ret;
+	int ret = -ESTALE;
 
 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
 			inode->i_sb->s_id,
@@ -684,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			nr_pages);
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+	if (NFS_STALE(inode))
+		goto out;
+
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
@@ -699,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
+out:
 	return ret;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee74d9..b99113b0f65 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
  *
  *  Split from inode.c by David Howells <dhowells@redhat.com>
  *
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ *   particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ *   of another (see nfs_lookup())
  */
 
 #include <linux/config.h>
@@ -52,66 +57,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
-
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2,
-#if defined(CONFIG_NFS_V3)
-	&nfs_version3,
-#elif defined(CONFIG_NFS_V4)
-	NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
-	&nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
-	.name			= "nfs",
-	.number			= NFS_PROGRAM,
-	.nrvers			= ARRAY_SIZE(nfs_version),
-	.version		= nfs_version,
-	.stats			= &nfs_rpcstat,
-	.pipe_dir_name		= "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
-	.program		= &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *	nfsacl_version[] = {
-	[3]			= &nfsacl_version3,
-};
-
-struct rpc_program		nfsacl_program = {
-	.name =			"nfsacl",
-	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		ARRAY_SIZE(nfsacl_version),
-	.version =		nfsacl_version,
-	.stats =		&nfsacl_rpcstat,
-};
-#endif  /* CONFIG_NFS_V3_ACL */
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
 
@@ -123,10 +74,10 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_clone_nfs_sb,
+	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -145,10 +96,10 @@ static struct super_operations nfs_sops = {
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -159,18 +110,18 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_clone_nfs4_sb,
+	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_referral_nfs4_sb,
+	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -187,39 +138,7 @@ static struct super_operations nfs4_sops = {
 };
 #endif
 
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
-		return -EINVAL;
-	*((int *)kp->arg) = num;
-	return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
-		 &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	int jif = num * HZ;
-	if (endp == val || *endp || num < 0 || jif < num)
-		return -EINVAL;
-	*((int *)kp->arg) = jif;
-	return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-		 &nfs_idmap_cache_timeout, 0644);
-#endif
+static struct shrinker *acl_shrinker;
 
 /*
  * Register the NFS filesystems
@@ -240,6 +159,7 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_2;
 #endif
+	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -257,6 +177,8 @@ error_0:
  */
 void __exit unregister_nfs_fs(void)
 {
+	if (acl_shrinker != NULL)
+		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
 	nfs_unregister_sysctl();
@@ -269,11 +191,10 @@ void __exit unregister_nfs_fs(void)
  */
 static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_server *server = NFS_SB(dentry->d_sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
 	struct nfs_fattr fattr;
 	struct nfs_fsstat res = {
 			.fattr = &fattr,
@@ -282,7 +203,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, rootfh, &res);
+	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -292,7 +213,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * case where f_frsize != f_bsize.  Eventually we want to
 	 * report the value of wtmult in this field.
 	 */
-	buf->f_frsize = sb->s_blocksize;
+	buf->f_frsize = dentry->d_sb->s_blocksize;
 
 	/*
 	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +222,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * thus historically Linux's sys_statfs reports these
 	 * fields in units of f_bsize.
 	 */
-	buf->f_bsize = sb->s_blocksize;
-	blockbits = sb->s_blocksize_bits;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	blockbits = dentry->d_sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +244,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 }
 
+/*
+ * Map the security flavour number to a name
+ */
 static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 {
-	static struct {
+	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
 	} sec_flavours[] = {
@@ -356,10 +280,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
  */
 static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
-	static struct proc_nfs_info {
+	static const struct proc_nfs_info {
 		int flag;
-		char *str;
-		char *nostr;
+		const char *str;
+		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
@@ -369,11 +293,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
-	struct proc_nfs_info *nfs_infop;
+	const struct proc_nfs_info *nfs_infop;
+	struct nfs_client *clp = nfss->nfs_client;
 	char buf[12];
-	char *proto;
+	const char *proto;
 
-	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +327,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
-	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
@@ -417,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	nfs_show_mount_options(m, nfss, 0);
 
 	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->hostname, " \t\n\\");
+	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
 
 	return 0;
 }
@@ -454,7 +379,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->rpc_ops->version == 4) {
+	if (nfss->nfs_client->cl_nfsversion == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,782 +426,353 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 
 /*
  * Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
-	struct nfs_server *server;
-	struct rpc_clnt	*rpc;
-
 	shrink_submounts(vfsmnt, &nfs_automount_list);
-	if (!(flags & MNT_FORCE))
-		return;
-	/* -EIO all pending I/O */
-	server = NFS_SB(vfsmnt->mnt_sb);
-	rpc = server->client;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
-	rpc = server->client_acl;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
 }
 
 /*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
  */
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+				   struct nfs_fh *mntfh)
 {
-	struct nfs_server	*server = NFS_SB(sb);
-	int			error;
-
-	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
-	if (error < 0) {
-		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		return ERR_PTR(error);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return -EINVAL;
 	}
 
-	server->fsid = fsinfo->fattr->fsid;
-	return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
-	struct nfs_server	*server;
-	struct inode		*root_inode;
-	struct nfs_fattr	fattr;
-	struct nfs_fsinfo	fsinfo = {
-					.fattr = &fattr,
-				};
-	struct nfs_pathconf pathinfo = {
-			.fattr = &fattr,
-	};
-	int no_root_error = 0;
-	unsigned long max_rpc_payload;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	server = NFS_SB(sb);
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		return -EINVAL;
+	}
 
-	sb->s_magic      = NFS_SUPER_MAGIC;
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
+	}
 
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		return -ENOMEM;
+	/* Set the pseudoflavor */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
 
-	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
-	/* Did getting the root inode fail? */
-	if (IS_ERR(root_inode)) {
-		no_root_error = PTR_ERR(root_inode);
-		goto out_no_root;
-	}
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		return -EPROTONOSUPPORT;
 	}
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-
-	/* mount time stamp, in seconds */
-	server->mount_time = jiffies;
-
-	/* Get some general file system info */
-	if (server->namelen == 0 &&
-	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
-		server->namelen = pathinfo.max_namelen;
-	/* Work out a lot of parameters */
-	if (server->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (server->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
-	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
-		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
-	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
-		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
-	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
-	if (server->rsize > max_rpc_payload)
-		server->rsize = max_rpc_payload;
-	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
-		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
-		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+#endif /* CONFIG_NFS_V3 */
 
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-							 &sb->s_blocksize_bits);
-	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-	if (server->flags & NFS_MOUNT_NOAC) {
-		server->acregmin = server->acregmax = 0;
-		server->acdirmin = server->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
+	/* We now require that the mount process passes the remote address */
+	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+		dprintk("%s: mount program didn't pass remote address!\n",
+			__FUNCTION__);
+		return -EINVAL;
 	}
-	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+	/* Prepare the root filehandle */
+	if (data->flags & NFS_MOUNT_VER3)
+		mntfh->size = data->root.size;
+	else
+		mntfh->size = NFS2_FHSIZE;
+
+	if (mntfh->size > sizeof(mntfh->data)) {
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		return -EINVAL;
+	}
 
-	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
-	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	memcpy(mntfh->data, data->root.data, mntfh->size);
+	if (mntfh->size < sizeof(mntfh->data))
+		memset(mntfh->data + mntfh->size, 0,
+		       sizeof(mntfh->data) - mntfh->size);
 
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 	return 0;
-	/* Yargs. It didn't work out. */
-out_no_root:
-	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
-	if (!IS_ERR(root_inode))
-		iput(root_inode);
-	return no_root_error;
 }
 
 /*
- * Initialise the timeout values for a connection
+ * Initialise the common bits of the superblock
  */
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
+static inline void nfs_initialise_sb(struct super_block *sb)
 {
-	to->to_initval = timeo * HZ / 10;
-	to->to_retries = retrans;
-	if (!to->to_retries)
-		to->to_retries = 2;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		if (!to->to_initval)
-			to->to_initval = 60 * HZ;
-		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
-			to->to_initval = NFS_MAX_TCP_TIMEOUT;
-		to->to_increment = to->to_initval;
-		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
-		to->to_exponential = 0;
-		break;
-	case IPPROTO_UDP:
-	default:
-		if (!to->to_initval)
-			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
-			to->to_initval = NFS_MAX_UDP_TIMEOUT;
-		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
-		to->to_exponential = 1;
-		break;
-	}
-}
+	struct nfs_server *server = NFS_SB(sb);
 
-/*
- * Create an RPC client handle.
- */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
-{
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
-	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-
-	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		return (struct rpc_clnt *)xprt;
-	}
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, data->pseudoflavor);
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		goto out_fail;
-	}
+	sb->s_magic = NFS_SUPER_MAGIC;
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
 
-	return clnt;
+	if (server->flags & NFS_MOUNT_NOAC)
+		sb->s_flags |= MS_SYNCHRONOUS;
 
-out_fail:
-	return clnt;
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
 /*
- * Clone a server record
+ * Finish setting up an NFS2/3 superblock
  */
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct inode *root_inode;
-	struct nfs_fsinfo fsinfo;
-	void *err = ERR_PTR(-ENOMEM);
-
-	sb->s_op = data->sb->s_op;
-	sb->s_blocksize = data->sb->s_blocksize;
-	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
-	sb->s_maxbytes = data->sb->s_maxbytes;
-
-	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		goto out;
-
-	server->client = rpc_clone_client(parent->client);
-	if (IS_ERR((err = server->client)))
-		goto out;
-
-	if (!IS_ERR(parent->client_sys)) {
-		server->client_sys = rpc_clone_client(parent->client_sys);
-		if (IS_ERR((err = server->client_sys)))
-			goto out;
-	}
-	if (!IS_ERR(parent->client_acl)) {
-		server->client_acl = rpc_clone_client(parent->client_acl);
-		if (IS_ERR((err = server->client_acl)))
-			goto out;
-	}
-	root_inode = nfs_fhget(sb, data->fh, data->fattr);
-	if (!root_inode)
-		goto out;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_put_root;
-	fsinfo.fattr = data->fattr;
-	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
-		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-	sb->s_flags |= MS_ACTIVE;
-	return server;
-out_put_root:
-	iput(root_inode);
-out:
-	return err;
-}
 
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
-		struct vfsmount *mnt)
-{
-	struct nfs_server *server;
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct super_block *sb = ERR_PTR(-EINVAL);
-	char *hostname;
-	int error = -ENOMEM;
-	int len;
-
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (server == NULL)
-		goto out_err;
-	memcpy(server, parent, sizeof(*server));
-	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
-	len = strlen(hostname) + 1;
-	server->hostname = kmalloc(len, GFP_KERNEL);
-	if (server->hostname == NULL)
-		goto free_server;
-	memcpy(server->hostname, hostname, len);
-	error = rpciod_up();
-	if (error != 0)
-		goto free_hostname;
-
-	sb = fill_sb(server, data);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto kill_rpciod;
-	}
-		
-	if (sb->s_root)
-		goto out_rpciod_down;
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
 
-	server = fill_server(sb, data);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_deactivate;
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
 	}
-	return simple_set_mnt(mnt, sb);
-out_deactivate:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	return error;
-out_rpciod_down:
-	rpciod_down();
-	kfree(server->hostname);
-	kfree(server);
-	return simple_set_mnt(mnt, sb);
-kill_rpciod:
-	rpciod_down();
-free_hostname:
-	kfree(server->hostname);
-free_server:
-	kfree(server);
-out_err:
-	return error;
+
+	sb->s_op = &nfs_sops;
+ 	nfs_initialise_sb(sb);
 }
 
 /*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Finish setting up a cloned NFS2/3 superblock
  */
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+static void nfs_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server	*server;
-	rpc_authflavor_t	authflavor;
+	struct nfs_server *server = NFS_SB(sb);
 
-	server           = NFS_SB(sb);
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	if (data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Start lockd here, before we might error out */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		return -ENOMEM;
-	strcpy(server->hostname, data->hostname);
-
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3) {
-		server->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-	}
-#else
-	server->rpc_ops = &nfs_v2_clientops;
-#endif
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 
-	/* Fill in pseudoflavor for mount version < 5 */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
-	authflavor = data->pseudoflavor;	/* save for sb_init() */
-	/* XXX maybe we want to add a server->pseudoflavor field */
-
-	/* Create RPC client handles */
-	server->client = nfs_create_client(server, data);
-	if (IS_ERR(server->client))
-		return PTR_ERR(server->client);
-	/* RFC 2623, sec 2.3.2 */
-	if (authflavor != RPC_AUTH_UNIX) {
-		struct rpc_auth *auth;
-
-		server->client_sys = rpc_clone_client(server->client);
-		if (IS_ERR(server->client_sys))
-			return PTR_ERR(server->client_sys);
-		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
-		if (IS_ERR(auth))
-			return PTR_ERR(auth);
-	} else {
-		atomic_inc(&server->client->cl_count);
-		server->client_sys = server->client;
-	}
 	if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
-		if (!(server->flags & NFS_MOUNT_NOACL)) {
-			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
-			/* No errors! Assume that Sun nfsacls are supported */
-			if (!IS_ERR(server->client_acl))
-				server->caps |= NFS_CAP_ACLS;
-		}
-#else
-		server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
-		/*
-		 * The VFS shouldn't apply the umask to mode bits. We will
-		 * do so ourselves when necessary.
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
 		 */
 		sb->s_flags |= MS_POSIXACL;
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
 	}
 
-	sb->s_op = &nfs_sops;
-	return nfs_sb_init(sb, authflavor);
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, void *_server)
 {
-	s->s_fs_info = data;
-	return set_anon_super(s, data);
+	struct nfs_server *server = _server;
+	int ret;
+
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
 }
 
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
+	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+	if (old->nfs_client != server->nfs_client)
 		return 0;
-	if (old->addr.sin_port != server->addr.sin_port)
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
-	return !nfs_compare_fh(&old->fh, &server->fh);
+	return 1;
 }
 
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh *root;
+	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
+	struct dentry *mntroot;
+	int error;
 
-	error = -EINVAL;
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	error = -EPROTONOSUPPORT;
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-#endif /* CONFIG_NFS_V3 */
+	/* Validate the mount data */
+	error = nfs_validate_mount_data(data, &mntfh);
+	if (error < 0)
+		return error;
 
-	error = -ENOMEM;
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
+	/* Get a volume representation */
+	server = nfs_create_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
 		goto out_err_noserver;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-
-	root = &server->fh;
-	if (data->flags & NFS_MOUNT_VER3)
-		root->size = data->root.size;
-	else
-		root->size = NFS2_FHSIZE;
-	error = -EINVAL;
-	if (root->size > sizeof(root->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		goto out_err;
-	}
-	memcpy(root->data, data->root.data, root->size);
-
-	/* We now require that the mount process passes the remote address */
-	memcpy(&server->addr, &data->addr, sizeof(server->addr));
-	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-				__FUNCTION__);
-		goto out_err;
-	}
-
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_err;
 	}
 
+	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err_rpciod;
+		goto out_err_nosb;
 	}
 
-	if (s->s_root)
-		goto out_rpciod_down;
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	s->s_flags = flags;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_fill_super(s, data);
+	}
 
-	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
-	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
 
-out_rpciod_down:
-	rpciod_down();
-	kfree(server);
-	return simple_set_mnt(mnt, s);
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	return 0;
 
-out_err_rpciod:
-	rpciod_down();
-out_err:
-	kfree(server);
+out_err_nosb:
+	nfs_free_server(server);
 out_err_noserver:
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	return error;
 }
 
+/*
+ * Destroy an NFS2/3 superblock
+ */
 static void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-
-	if (!IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_sys))
-		rpc_shutdown_client(server->client_sys);
-	if (!IS_ERR(server->client_acl))
-		rpc_shutdown_client(server->client_acl);
-
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
-
-	rpciod_down();		/* release rpciod */
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
-}
-
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
-	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-	return sb;
+	nfs_free_server(server);
 }
 
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data,
+			   struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
-}
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
-{
-	struct nfs4_client *clp;
-	struct rpc_xprt *xprt = NULL;
-	struct rpc_clnt *clnt = NULL;
-	int err = -EIO;
-
-	clp = nfs4_get_client(&server->addr.sin_addr);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(err);
-	}
+	dprintk("--> nfs_xdev_get_sb()\n");
 
-	/* Now create transport and client */
-	down_write(&clp->cl_sem);
-	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(proto, &server->addr, timeparms);
-		if (IS_ERR(xprt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(xprt);
-			dprintk("%s: cannot create RPC transport. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		/* Bind to a reserved port! */
-		xprt->resvport = 1;
-		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, flavor);
-		if (IS_ERR(clnt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(clnt);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		clnt->cl_intr     = 1;
-		clnt->cl_softrtry = 1;
-		clp->cl_rpcclient = clnt;
-		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		nfs_idmap_new(clp);
-	}
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (!IS_ERR(clnt))
-		server->nfs4_state = clp;
-	up_write(&clp->cl_sem);
-	clp = NULL;
-
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %d\n",
-				__FUNCTION__, err);
-		return clnt;
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	if (server->nfs4_state->cl_idmap == NULL) {
-		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return ERR_PTR(-ENOMEM);
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
 
-	if (clnt->cl_auth->au_flavor != flavor) {
-		struct rpc_auth *auth;
-
-		auth = rpcauth_create(flavor, clnt);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return (struct rpc_clnt *)auth;
-		}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
-	return clnt;
-
- out_fail:
-	if (clp)
-		nfs4_put_client(clp);
-	return ERR_PTR(err);
-}
-
-/*
- * Set up an NFS4 superblock
- */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
-{
-	struct nfs_server *server;
-	struct rpc_timeout timeparms;
-	rpc_authflavor_t authflavour;
-	int err = -EIO;
 
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_clone_super(s, data->sb);
+	}
 
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
+	mntroot = nfs_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	server->rpc_ops = &nfs_v4_clientops;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+	dprintk("<-- nfs_xdev_get_sb() = 0\n");
+	return 0;
 
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+	return error;
+}
 
-	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
-	if (IS_ERR(server->client)) {
-		err = PTR_ERR(server->client);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-	}
+#ifdef CONFIG_NFS_V4
 
+/*
+ * Finish setting up a cloned NFS4 superblock
+ */
+static void nfs4_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
+{
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_time_gran = 1;
-
-	sb->s_op = &nfs4_sops;
-	err = nfs_sb_init(sb, authflavour);
-
- out_fail:
-	return err;
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs4_compare_super(struct super_block *sb, void *data)
+/*
+ * Set up an NFS4 superblock
+ */
+static void nfs4_fill_super(struct super_block *sb)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (strcmp(server->hostname, old->hostname) != 0)
-		return 0;
-	if (strcmp(server->mnt_path, old->mnt_path) != 0)
-		return 0;
-	return 1;
+	sb->s_time_gran = 1;
+	sb->s_op = &nfs4_sops;
+	nfs_initialise_sb(sb);
 }
 
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 {
 	void *p = NULL;
 
@@ -1297,14 +793,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 	return dst;
 }
 
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
-	struct nfs_server *server;
-	struct super_block *s;
 	struct nfs4_mount_data *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct sockaddr_in addr;
+	rpc_authflavor_t authflavour;
+	struct nfs_fh mntfh;
+	struct dentry *mntroot;
+	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
 	void *p;
+	int error;
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +819,112 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		return -EINVAL;
 	}
 
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		return -ENOMEM;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(addr))
+		return -EINVAL;
+
+	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+		return -EFAULT;
+
+	if (addr.sin_family != AF_INET ||
+	    addr.sin_addr.s_addr == INADDR_ANY
+	    ) {
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+	/* RFC3530: The default port for NFS is 2049 */
+	if (addr.sin_port == 0)
+		addr.sin_port = NFS_PORT;
+
+	/* Grab the authentication type */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			error = -EINVAL;
+			goto out_err_noserver;
+		}
+
+		if (copy_from_user(&authflavour, data->auth_flavours,
+				   sizeof(authflavour))) {
+			error = -EFAULT;
+			goto out_err_noserver;
+		}
+	}
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
-	server->hostname = p;
+	hostname = p;
 
 	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
 	if (IS_ERR(p))
 		goto out_err;
-	server->mnt_path = p;
+	mntpath = p;
+
+	dprintk("MNTPATH: %s\n", mntpath);
 
-	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
-			sizeof(server->ip_addr) - 1);
+	p = nfs_copy_user_string(ip_addr, &data->client_addr,
+				 sizeof(ip_addr) - 1);
 	if (IS_ERR(p))
 		goto out_err;
 
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(server->addr)) {
-		error = -EINVAL;
-		goto out_free;
-	}
-	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		error = -EFAULT;
-		goto out_free;
-	}
-	if (server->addr.sin_family != AF_INET ||
-	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		error = -EINVAL;
-		goto out_free;
-	}
-
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_free;
+	/* Get a volume representation */
+	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+				    authflavour, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
 	}
 
-	if (s->s_root) {
-		kfree(server->mnt_path);
-		kfree(server->hostname);
-		kfree(server);
-		return simple_set_mnt(mnt, s);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
 
-	s->s_flags = flags;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
 
-	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs4_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	kfree(mntpath);
+	kfree(hostname);
+	return 0;
+
 out_err:
 	error = PTR_ERR(p);
+	goto out_err_noserver;
+
 out_free:
-	kfree(server->mnt_path);
-	kfree(server->hostname);
-	kfree(server);
+	nfs_free_server(server);
+out_err_noserver:
+	kfree(mntpath);
+	kfree(hostname);
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	goto out_err_noserver;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +935,140 @@ static void nfs4_kill_super(struct super_block *sb)
 	kill_anon_super(sb);
 
 	nfs4_renewd_prepare_shutdown(server);
+	nfs_free_server(server);
+}
+
+/*
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
+ */
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data,
+			    struct vfsmount *mnt)
+{
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
+
+	dprintk("--> nfs4_xdev_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
 
-	if (server->client != NULL && !IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	destroy_nfsv4_state(server);
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_clone_super(s, data->sb);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	rpciod_down();
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
 /*
- * Constructs the SERVER-side path
+ * Create an NFS4 server record on referral traversal
  */
-static inline char *nfs4_dup_path(const struct dentry *dentry)
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data,
+				struct vfsmount *mnt)
 {
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *path;
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	struct nfs_fh mntfh;
+	int error;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (!IS_ERR(path)) {
-		int len = PAGE_SIZE + page - path;
-		char *tmp = path;
+	dprintk("--> nfs4_referral_get_sb()\n");
 
-		path = kmalloc(len, GFP_KERNEL);
-		if (path)
-			memcpy(path, tmp, len);
-		else
-			path = ERR_PTR(-ENOMEM);
+	/* create a new volume representation */
+	server = nfs4_create_referral_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
-	free_page((unsigned long)page);
-	return path;
-}
 
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	const struct dentry *dentry = data->dentry;
-	struct nfs4_client *clp = server->nfs4_state;
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	server->mnt_path = nfs4_dup_path(dentry);
-	if (IS_ERR(server->mnt_path)) {
-		sb = (struct super_block *)server->mnt_path;
-		goto err;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	nfs4_server_capabilities(server, &server->fh);
-
-	down_write(&clp->cl_sem);
-	atomic_inc(&clp->cl_count);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
 
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
-}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb = ERR_PTR(-ENOMEM);
-	int len;
-
-	len = strlen(data->mnt_path) + 1;
-	server->mnt_path = kmalloc(len, GFP_KERNEL);
-	if (server->mnt_path == NULL)
-		goto err;
-	memcpy(server->mnt_path, data->mnt_path, len);
-	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
 
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_timeout timeparms;
-	int proto, timeo, retrans;
-	void *err;
-
-	proto = IPPROTO_TCP;
-	/* Since we are following a referral and there may be alternatives,
-	   set the timeouts and retries to low values */
-	timeo = 2;
-	retrans = 1;
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
-
-	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
-	if (IS_ERR((err = server->client)))
-		goto out_err;
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	sb->s_time_gran = 1;
-	sb->s_op = &nfs4_sops;
-	err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
-	if (!IS_ERR(err))
-		return server;
-out_err:
-	return (struct nfs_server *)err;
-}
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8ab3cf10d79..c12effb46fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
+	writeback_congestion_end();
 	return err;
 }
 
@@ -590,8 +591,8 @@ static void nfs_cancel_commit_list(struct list_head *head)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_inode_remove_request(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 }
 
@@ -1252,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
-	/* Call the NFS version-specific code */
+	/*
+	 * ->write_done will attempt to use post-op attributes to detect
+	 * conflicting writes by other clients.  A strict interpretation
+	 * of close-to-open would allow us to continue caching even if
+	 * another writer had changed the file, but some applications
+	 * depend on tighter cache coherency when writing.
+	 */
 	status = NFS_PROTO(data->inode)->write_done(task, data);
 	if (status != 0)
 		return status;
@@ -1273,7 +1280,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS: faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->hostname,
+				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
@@ -1386,8 +1393,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1d2e3..8583d99ee74 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 {
 	struct sockaddr_in	addr;
 	struct nfs4_callback    *cb = &clp->cl_callback;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt *	xprt;
+	struct rpc_timeout	timeparms = {
+		.to_initval	= (NFSD_LEASE_TIME/4) * HZ,
+		.to_retries	= 5,
+		.to_maxval	= (NFSD_LEASE_TIME/2) * HZ,
+		.to_exponential	= 1,
+	};
 	struct rpc_program *	program = &cb->cb_program;
-	struct rpc_stat *	stat = &cb->cb_stat;
-	struct rpc_clnt *	clnt;
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_TCP,
+		.address	= (struct sockaddr *)&addr,
+		.addrsize	= sizeof(addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_name.data,
+		.program	= program,
+		.version	= nfs_cb_version[1]->number,
+		.authflavor	= RPC_AUTH_UNIX,	/* XXX: need AUTH_GSS... */
+		.flags		= (RPC_CLNT_CREATE_NOPING),
+	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char                    hostname[32];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	addr.sin_port = htons(cb->cb_port);
 	addr.sin_addr.s_addr = htonl(cb->cb_addr);
 
-	/* Initialize timeout */
-	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
-	timeparms.to_retries = 0;
-	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
-	timeparms.to_exponential = 1;
-
-	/* Create RPC transport */
-	xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("NFSD: couldn't create callback transport!\n");
-		goto out_err;
-	}
-
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
 	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
-	program->stats = stat;
+	program->stats = &cb->cb_stat;
 
 	/* Initialize rpc_stat */
-	memset(stat, 0, sizeof(struct rpc_stat));
-	stat->program = program;
-
-	/* Create RPC client
- 	 *
-	 * XXX AUTH_UNIX only - need AUTH_GSS....
-	 */
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
+	memset(program->stats, 0, sizeof(cb->cb_stat));
+	program->stats->program = program;
+
+	/* Create RPC client */
+	cb->cb_client = rpc_create(&args);
+	if (!cb->cb_client) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
 	}
-	clnt->cl_intr = 0;
-	clnt->cl_softrtry = 1;
 
 	/* Kick rpciod, put the call on the wire. */
-
-	if (rpciod_up() != 0) {
-		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+	if (rpciod_up() != 0)
 		goto out_clnt;
-	}
-
-	cb->cb_client = clnt;
 
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -462,7 +450,7 @@ out_rpciod:
 	rpciod_down();
 	cb->cb_client = NULL;
 out_clnt:
-	rpc_shutdown_client(clnt);
+	rpc_shutdown_client(cb->cb_client);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da7506363..e35d7e52fde 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
 *
 */
 
-
+#include <linux/err.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg[1];
 	int status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
-		goto out;
-	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		goto out_no_tfm;
+	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 	if (cksum.data == NULL)
  		goto out;
-	crypto_digest_init(tfm);
 
 	sg[0].page = virt_to_page(clname->data);
 	sg[0].offset = offset_in_page(clname->data);
 	sg[0].length = clname->len;
 
-	crypto_digest_update(tfm, sg, 1);
-	crypto_digest_final(tfm, cksum.data);
+	if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
 	kfree(cksum.data);
 	status = nfs_ok;
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(desc.tfm);
+out_no_tfm:
 	return status;
 }
 
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be845a61..9fb8132f19b 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
 	file.o 			\
 	heartbeat.o 		\
 	inode.o 		\
+	ioctl.o 		\
 	journal.o 		\
 	localalloc.o 		\
 	mmap.o 			\
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05a93e..f43bc5f18a3 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 			ocfs2_remove_from_cache(inode, eb_bh);
 
-			BUG_ON(eb->h_suballoc_slot);
 			BUG_ON(el->l_recs[0].e_clusters);
 			BUG_ON(el->l_recs[0].e_cpos);
 			BUG_ON(el->l_recs[0].e_blkno);
-			status = ocfs2_free_extent_block(handle,
-							 tc->tc_ext_alloc_inode,
-							 tc->tc_ext_alloc_bh,
-							 eb);
-			if (status < 0) {
-				mlog_errno(status);
-				goto bail;
+			if (eb->h_suballoc_slot == 0) {
+				/*
+				 * This code only understands how to
+				 * lock the suballocator in slot 0,
+				 * which is fine because allocation is
+				 * only ever done out of that
+				 * suballocator too. A future version
+				 * might change that however, so avoid
+				 * a free if we don't know how to
+				 * handle it. This way an fs incompat
+				 * bit will not be necessary.
+				 */
+				status = ocfs2_free_extent_block(handle,
+								 tc->tc_ext_alloc_inode,
+								 tc->tc_ext_alloc_bh,
+								 eb);
+				if (status < 0) {
+					mlog_errno(status);
+					goto bail;
+				}
 			}
 		}
 		brelse(eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c342ce0..3d7c082a8f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ out:
 static int ocfs2_commit_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
-	int ret, extending = 0, locklevel = 0;
-	loff_t new_i_size;
+	int ret;
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_dinode *di;
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
 
 	/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
-	 * us to sample inode->i_size here without the metadata lock:
+	 * us to continue here without rechecking the I/O against
+	 * changed inode values.
 	 *
 	 * 1) We're currently holding the inode alloc lock, so no
 	 *    nodes can change it underneath us.
 	 *
 	 * 2) We've had to take the metadata lock at least once
-	 *    already to check for extending writes, hence insuring
-	 *    that our current copy is also up to date.
+	 *    already to check for extending writes, suid removal, etc.
+	 *    The meta data update code then ensures that we don't get a
+	 *    stale inode allocation image (i_size, i_clusters, etc).
 	 */
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > i_size_read(inode)) {
-		extending = 1;
-		locklevel = 1;
-	}
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_unlock_meta;
 	}
 
-	if (extending) {
-		handle = ocfs2_start_walk_page_trans(inode, page, from, to);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			handle = NULL;
-			goto out_unlock_data;
-		}
+	handle = ocfs2_start_walk_page_trans(inode, page, from, to);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_unlock_data;
+	}
 
-		/* Mark our buffer early. We'd rather catch this error up here
-		 * as opposed to after a successful commit_write which would
-		 * require us to set back inode->i_size. */
-		ret = ocfs2_journal_access(handle, inode, di_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	/* Mark our buffer early. We'd rather catch this error up here
+	 * as opposed to after a successful commit_write which would
+	 * require us to set back inode->i_size. */
+	ret = ocfs2_journal_access(handle, inode, di_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
 	/* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_commit;
 	}
 
-	if (extending) {
-		loff_t size = (u64) i_size_read(inode);
-		struct ocfs2_dinode *di =
-			(struct ocfs2_dinode *)di_bh->b_data;
+	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-		/* ocfs2_mark_inode_dirty is too heavy to use here. */
-		inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	/* ocfs2_mark_inode_dirty() is too heavy to use here. */
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
-		di->i_size = cpu_to_le64(size);
-		di->i_ctime = di->i_mtime = 
-				cpu_to_le64(inode->i_mtime.tv_sec);
-		di->i_ctime_nsec = di->i_mtime_nsec = 
-				cpu_to_le32(inode->i_mtime.tv_nsec);
+	inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+	di->i_size = cpu_to_le64((u64)i_size_read(inode));
 
-		ret = ocfs2_journal_dirty(handle, di_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	ret = ocfs2_journal_dirty(handle, di_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
-	BUG_ON(extending && (i_size_read(inode) != new_i_size));
-
 out_commit:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
 out_unlock_data:
 	ocfs2_data_unlock(inode, 1);
 out_unlock_meta:
-	ocfs2_meta_unlock(inode, locklevel);
+	ocfs2_meta_unlock(inode, 1);
 out:
 	if (di_bh)
 		brelse(di_bh);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6..c9037414f4f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
 		   (unsigned long long)block, nr, flags, inode);
 
+	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+	       (!inode || !(flags & OCFS2_BH_CACHED)));
+
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
 		mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		bh = bhs[i];
 		ignore_cache = 0;
 
+		/* There are three read-ahead cases here which we need to
+		 * be concerned with. All three assume a buffer has
+		 * previously been submitted with OCFS2_BH_READAHEAD
+		 * and it hasn't yet completed I/O.
+		 *
+		 * 1) The current request is sync to disk. This rarely
+		 *    happens these days, and never when performance
+		 *    matters - the code can just wait on the buffer
+		 *    lock and re-submit.
+		 *
+		 * 2) The current request is cached, but not
+		 *    readahead. ocfs2_buffer_uptodate() will return
+		 *    false anyway, so we'll wind up waiting on the
+		 *    buffer lock to do I/O. We re-check the request
+		 *    with after getting the lock to avoid a re-submit.
+		 *
+		 * 3) The current request is readahead (and so must
+		 *    also be a caching one). We short circuit if the
+		 *    buffer is locked (under I/O) and if it's in the
+		 *    uptodate cache. The re-check from #2 catches the
+		 *    case that the previous read-ahead completes just
+		 *    before our is-it-in-flight check.
+		 */
+
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 			}
 
+			/* A read-ahead request was made - if the
+			 * buffer is already under read-ahead from a
+			 * previously submitted request than we are
+			 * done here. */
+			if ((flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_read_ahead(inode, bh))
+				continue;
+
 			lock_buffer(bh);
 			if (buffer_jbd(bh)) {
 #ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 #endif
 			}
+
+			/* Re-check ocfs2_buffer_uptodate() as a
+			 * previously read-ahead buffer may have
+			 * completed I/O while we were waiting for the
+			 * buffer lock. */
+			if ((flags & OCFS2_BH_CACHED)
+			    && !(flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_uptodate(inode, bh)) {
+				unlock_buffer(bh);
+				continue;
+			}
+
 			clear_buffer_uptodate(bh);
 			get_bh(bh); /* for end_buffer_read_sync() */
 			bh->b_end_io = end_buffer_read_sync;
-			if (flags & OCFS2_BH_READAHEAD)
-				submit_bh(READA, bh);
-			else
-				submit_bh(READ, bh);
+			submit_bh(READ, bh);
 			continue;
 		}
 	}
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	for (i = (nr - 1); i >= 0; i--) {
 		bh = bhs[i];
 
-		/* We know this can't have changed as we hold the
-		 * inode sem. Avoid doing any work on the bh if the
-		 * journal has it. */
-		if (!buffer_jbd(bh))
-			wait_on_buffer(bh);
-
-		if (!buffer_uptodate(bh)) {
-			/* Status won't be cleared from here on out,
-			 * so we can safely record this and loop back
-			 * to cleanup the other buffers. Don't need to
-			 * remove the clustered uptodate information
-			 * for this bh as it's not marked locally
-			 * uptodate. */
-			status = -EIO;
-			brelse(bh);
-			bhs[i] = NULL;
-			continue;
+		if (!(flags & OCFS2_BH_READAHEAD)) {
+			/* We know this can't have changed as we hold the
+			 * inode sem. Avoid doing any work on the bh if the
+			 * journal has it. */
+			if (!buffer_jbd(bh))
+				wait_on_buffer(bh);
+
+			if (!buffer_uptodate(bh)) {
+				/* Status won't be cleared from here on out,
+				 * so we can safely record this and loop back
+				 * to cleanup the other buffers. Don't need to
+				 * remove the clustered uptodate information
+				 * for this bh as it's not marked locally
+				 * uptodate. */
+				status = -EIO;
+				brelse(bh);
+				bhs[i] = NULL;
+				continue;
+			}
 		}
 
+		/* Always set the buffer in the cache, even if it was
+		 * a forced read, or read-ahead which hasn't yet
+		 * completed. */
 		if (inode)
 			ocfs2_set_buffer_uptodate(inode, bh);
 	}
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
-	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
 
 bail:
 
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb90937b6..6cc20930fac 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 
 
 #define OCFS2_BH_CACHED            1
-#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
+#define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
 				   struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d6cf6..305cba3681f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
 		max_pages = q->max_hw_segments;
 	max_pages--; /* Handle I/Os that straddle a page */
 
-	max_sectors = max_pages << (PAGE_SHIFT - 9);
-
+	if (max_pages) {
+		max_sectors = max_pages << (PAGE_SHIFT - 9);
+	} else {
+		/* If BIO contains 1 or less than 1 page. */
+		max_sectors = q->max_sectors;
+	}
 	/* Why is fls() 1-based???? */
 	pow_two_sectors = 1 << (fls(max_sectors) - 1);
 
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1a5f3..04e01915b86 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
+	unsigned long offset, blk, last_ra_blk = 0;
+	int i, stored;
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
-	int have_disk_lock = 0;
+	unsigned int ra_sectors = 16;
 
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			mlog_errno(error);
 		/* we haven't got any yet, so propagate the error. */
 		stored = error;
-		goto bail;
+		goto bail_nolock;
 	}
-	have_disk_lock = 1;
 
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			continue;
 		}
 
-		/*
-		 * Do the readahead (8k)
-		 */
-		if (!offset) {
-			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+		/* The idea here is to begin with 8k read-ahead and to stay
+		 * 4k ahead of our current position.
+		 *
+		 * TODO: Use the pagecache for this. We just need to
+		 * make sure it's cluster-safe... */
+		if (!last_ra_blk
+		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
 			     i > 0; i--) {
 				tmp = ocfs2_bread(inode, ++blk, &err, 1);
 				if (tmp)
 					brelse(tmp);
 			}
+			last_ra_blk = blk;
+			ra_sectors = 8;
 		}
 
 revalidate:
@@ -194,9 +198,9 @@ revalidate:
 
 	stored = 0;
 bail:
-	if (have_disk_lock)
-		ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, 0);
 
+bail_nolock:
 	mlog_exit(stored);
 
 	return stored;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2bbe2..f13a4bac41f 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
-		       "name=%.*s, namelen=%u\n", 
-		       past->type == DLM_AST ? "" : "b", 
-		       dlm_get_lock_cookie_node(cookie),
-		       dlm_get_lock_cookie_seq(cookie),
-		       locklen, name, locklen);
+	mlog(0, "got %sast for unknown lock!  cookie=%u:%llu, "
+	     "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 
+	     dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+	     locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1fbb34..151b41781ea 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 
 	mlog_meta_lvb(0, lockres);
 
@@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 
+	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
+	ocfs2_set_inode_flags(inode);
+
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
 	     be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
-	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
-	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
+	     be32_to_cpu(lvb->lvb_iattr));
 }
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db2d9e..243ae862ece 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
-#define OCFS2_LVB_VERSION 2
+#define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
 	__be32       lvb_version;
@@ -40,7 +40,8 @@ struct ocfs2_meta_lvb {
 	__be64       lvb_isize;
 	__be16       lvb_imode;
 	__be16       lvb_inlink;
-	__be32       lvb_reserved[3];
+	__be32       lvb_iattr;
+	__be32       lvb_reserved[2];
 };
 
 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c87453..2bbfa17090c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
 #include "file.h"
 #include "sysfile.h"
 #include "inode.h"
+#include "ioctl.h"
 #include "journal.h"
 #include "mmap.h"
 #include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
 	.open		= ocfs2_file_open,
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
+	.ioctl		= ocfs2_ioctl,
 };
 
 const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
+	.ioctl		= ocfs2_ioctl,
 };
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86e..7bcf6915459 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 				    struct inode *inode,
 				    struct buffer_head *fe_bh);
 
+void ocfs2_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = OCFS2_I(inode)->ip_attr;
+
+	inode->i_flags &= ~(S_IMMUTABLE |
+		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
+	if (flags & OCFS2_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+
+	if (flags & OCFS2_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & OCFS2_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & OCFS2_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & OCFS2_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 				     u64 blkno,
 				     int delete_vote)
@@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_blocks =
 			ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
 	inode->i_mapping->a_ops = &ocfs2_aops;
-	inode->i_flags |= S_NOATIME;
 	inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
 
 	if (create_ino)
 		inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
 				  OCFS2_LOCK_TYPE_DATA, inode);
 
+	ocfs2_set_inode_flags(inode);
+	inode->i_flags |= S_NOATIME;
+
 	status = 0;
 bail:
 	mlog_exit(status);
@@ -1027,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
 	u64 p_blkno;
 	int readflags = OCFS2_BH_CACHED;
 
-#if 0
-	/* only turn this on if we know we can deal with read_block
-	 * returning nothing */
 	if (reada)
 		readflags |= OCFS2_BH_READAHEAD;
-#endif
 
 	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
 	    i_size_read(inode)) {
@@ -1131,6 +1150,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
+	fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 	fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1189,8 @@ void ocfs2_refresh_inode(struct inode *inode,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
+	ocfs2_set_inode_flags(inode);
 	i_size_write(inode, le64_to_cpu(fe->i_size));
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 	inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6cf84..4d1e5399256 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
 	struct ocfs2_journal_handle	*ip_handle;
 
 	u32				ip_flags; /* see below */
+	u32				ip_attr; /* inode attributes */
 
 	/* protected by recovery_lock. */
 	struct inode			*ip_next_orphan;
@@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
 int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
 
+void ocfs2_set_inode_flags(struct inode *inode);
+
 #endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 00000000000..3663cef8068
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,136 @@
+/*
+ * linux/fs/ocfs2/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "inode.h"
+#include "journal.h"
+
+#include "ocfs2_fs.h"
+#include "ioctl.h"
+
+#include <linux/ext2_fs.h>
+
+static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
+{
+	int status;
+
+	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+	*flags = OCFS2_I(inode)->ip_attr;
+	ocfs2_meta_unlock(inode, 0);
+
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+				unsigned mask)
+{
+	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_journal_handle *handle = NULL;
+	struct buffer_head *bh = NULL;
+	unsigned oldflags;
+	int status;
+
+	mutex_lock(&inode->i_mutex);
+
+	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = -EROFS;
+	if (IS_RDONLY(inode))
+		goto bail_unlock;
+
+	status = -EACCES;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		goto bail_unlock;
+
+	if (!S_ISDIR(inode->i_mode))
+		flags &= ~OCFS2_DIRSYNC_FL;
+
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto bail_unlock;
+	}
+
+	oldflags = ocfs2_inode->ip_attr;
+	flags = flags & mask;
+	flags |= oldflags & ~mask;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 */
+	status = -EPERM;
+	if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
+		(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto bail_unlock;
+	}
+
+	ocfs2_inode->ip_attr = flags;
+	ocfs2_set_inode_flags(inode);
+
+	status = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (status < 0)
+		mlog_errno(status);
+
+	ocfs2_commit_trans(handle);
+bail_unlock:
+	ocfs2_meta_unlock(inode, 1);
+bail:
+	mutex_unlock(&inode->i_mutex);
+
+	if (bh)
+		brelse(bh);
+
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg)
+{
+	unsigned int flags;
+	int status;
+
+	switch (cmd) {
+	case OCFS2_IOC_GETFLAGS:
+		status = ocfs2_get_inode_attr(inode, &flags);
+		if (status < 0)
+			return status;
+
+		flags &= OCFS2_FL_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case OCFS2_IOC_SETFLAGS:
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		return ocfs2_set_inode_attr(inode, flags,
+			OCFS2_FL_MODIFIABLE);
+	default:
+		return -ENOTTY;
+	}
+}
+
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 00000000000..4a7c82931db
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
+/*
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ *
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862c8bd..0d3e939b1f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
 #include "journal.h"
 #include "namei.h"
 #include "suballoc.h"
+#include "super.h"
 #include "symlink.h"
 #include "sysfile.h"
 #include "uptodate.h"
@@ -310,13 +311,6 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
-	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
-		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
-		status = -EMLINK;
-		goto leave;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		status = -ENOMEM;
@@ -331,6 +325,11 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
+		status = -EMLINK;
+		goto leave;
+	}
+
 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
 	if (!dirfe->i_links_count) {
 		/* can't make a file in a deleted directory. */
@@ -643,11 +642,6 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
-	if (inode->i_nlink >= OCFS2_LINK_MAX) {
-		err = -EMLINK;
-		goto bail;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		err = -ENOMEM;
@@ -661,6 +655,11 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
+	if (!dir->i_nlink) {
+		err = -ENOENT;
+		goto bail;
+	}
+
 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
 					dentry->d_name.len);
 	if (err)
@@ -1964,13 +1963,8 @@ restart:
 				}
 				num++;
 
-				/* XXX: questionable readahead stuff here */
 				bh = ocfs2_bread(dir, b++, &err, 1);
 				bh_use[ra_max] = bh;
-#if 0		// ???
-				if (bh)
-					ll_rw_block(READ, 1, &bh);
-#endif
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1978,6 +1972,10 @@ restart:
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
+			ocfs2_error(dir->i_sb, "reading directory %llu, "
+				    "offset %lu\n",
+				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
+				    block);
 			brelse(bh);
 			goto next;
 		}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac547c1..3330a5dc6be 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 #define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
 
+/* Inode attributes, keep in sync with EXT2 */
+#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
+#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
+#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
+#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
+#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
+#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
+#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
+#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
+
+#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
+#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
 	__le32 i_atime_nsec;
 	__le32 i_ctime_nsec;
 	__le32 i_mtime_nsec;
-/*70*/	__le64 i_reserved1[9];
+	__le32 i_attr;
+	__le32 i_reserved1;
+/*70*/	__le64 i_reserved2[8];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a79332..9707ed7a320 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 }
 
 /* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache. 
+ * 
+ * This can be called under lock_buffer()
+ */
 int ocfs2_buffer_uptodate(struct inode *inode,
 			  struct buffer_head *bh)
 {
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
 }
 
+/* 
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh)
+{
+	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
 /* Requires ip_lock */
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
@@ -403,7 +416,11 @@ out_free:
  *
  * Note that this function may actually fail to insert the block if
  * memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
 void ocfs2_set_buffer_uptodate(struct inode *inode,
 			       struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d26b0..2e73206059a 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh);
 void ocfs2_remove_from_cache(struct inode *inode,
 			     struct buffer_head *bh);
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
author	Jeff Garzik <jeff@garzik.org>	2006-09-24 01:52:47 -0400
committer	Jeff Garzik <jeff@garzik.org>	2006-09-24 01:52:47 -0400
commit	23930fa1cebfea6f79881c588ccd1b0781e49e3f (patch)
tree	36d29e3f83661c4f5f45b6f74ac0d5f9886867a8 /fs
parent	36b35a5be0e4b406acd816e2122d153e875105be (diff)
parent	4f5537de7c1531398e84e18a24f667e49cc94208 (diff)