Merge Linux 2.6.23

author: David Woodhouse <dwmw2@infradead.org> 2007-10-13 14:43:54 +0100
committer: David Woodhouse <dwmw2@infradead.org> 2007-10-13 14:43:54 +0100
commit: b160292cc216a50fd0cd386b0bda2cd48352c73b (patch)
tree: ef07cf98f91353ee4c9ec1e1ca7a2a5d9d4b538a /fs
parent: b37bde147890c8fea8369a5a4e230dabdea4ebfb (diff)
parent: bbf25010f1a6b761914430f5fca081ec8c7accd1 (diff)
61 files changed, 698 insertions, 521 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 08fa320b7e6..15e05a15b57 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -92,23 +92,6 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 	return fid;
 }
 
-struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry)
-{
-	struct p9_fid *fid;
-	struct v9fs_dentry *dent;
-
-	dent = dentry->d_fsdata;
-	fid = v9fs_fid_lookup(dentry);
-	if (!IS_ERR(fid)) {
-		spin_lock(&dent->lock);
-		list_del(&fid->dlist);
-		spin_unlock(&dent->lock);
-	}
-
-	return fid;
-}
-
-
 /**
  * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
  * 	release it
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 47a0ba74287..26e07df783b 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -28,6 +28,5 @@ struct v9fs_dentry {
 };
 
 struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
-struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry);
 struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
 int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
diff --git a/fs/Kconfig b/fs/Kconfig
index 84fb8428c02..bb02b39380a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -441,9 +441,6 @@ config OCFS2_FS
 
 	  Note: Features which OCFS2 does not support yet:
 	          - extended attributes
-		  - shared writeable mmap
-	          - loopback is supported, but data written will not
-	            be cluster coherent.
 	          - quotas
 	          - cluster aware flock
 	          - Directory change notification (F_NOTIFY)
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index a3684dcc76e..6f8c96fb29e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -235,8 +235,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
-		mntput(nd->mnt);
 		dput(nd->dentry);
+		mntput(nd->mnt);
 		nd->mnt = newmnt;
 		nd->dentry = dget(newmnt->mnt_root);
 		schedule_delayed_work(&afs_mntpt_expiry_timer,
diff --git a/fs/aio.c b/fs/aio.c
index dbe699e9828..ea2e1982038 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1562,6 +1562,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		fput(file);
 		return -EAGAIN;
 	}
+	req->ki_filp = file;
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
@@ -1576,7 +1577,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		}
 	}
 
-	req->ki_filp = file;
 	ret = put_user(req->ki_key, &user_iocb->aio_key);
 	if (unlikely(ret)) {
 		dprintk("EFAULT: aio_key\n");
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 861141b4f6d..fcb3405bb14 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -742,6 +742,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	 * __start to address 4 so that is okay).
 	 */
 	if (rev > OLD_FLAT_VERSION) {
+		unsigned long persistent = 0;
 		for (i=0; i < relocs; i++) {
 			unsigned long addr, relval;
 
@@ -749,6 +750,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			   relocated (of course, the address has to be
 			   relocated first).  */
 			relval = ntohl(reloc[i]);
+			if (flat_set_persistent (relval, &persistent))
+				continue;
 			addr = flat_get_relocate_addr(relval);
 			rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
 			if (rp == (unsigned long *)RELOC_FAILED) {
@@ -757,7 +760,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 			}
 
 			/* Get the pointer's value.  */
-			addr = flat_get_addr_from_rp(rp, relval, flags);
+			addr = flat_get_addr_from_rp(rp, relval, flags,
+							&persistent);
 			if (addr != 0) {
 				/*
 				 * Do the relocation.  PIC relocs in the data section are
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a6c9078af12..37310b0e810 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2311,8 +2311,10 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
 	struct iwreq __user *iwr_u;
 	struct iw_point __user *iwp;
 	struct compat_iw_point __user *iwp_u;
-	compat_caddr_t pointer;
+	compat_caddr_t pointer_u;
+	void __user *pointer;
 	__u16 length, flags;
+	int ret;
 
 	iwr_u = compat_ptr(arg);
 	iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data;
@@ -2330,17 +2332,29 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
 			   sizeof(iwr->ifr_ifrn.ifrn_name)))
 		return -EFAULT;
 
-	if (__get_user(pointer, &iwp_u->pointer) ||
+	if (__get_user(pointer_u, &iwp_u->pointer) ||
 	    __get_user(length, &iwp_u->length) ||
 	    __get_user(flags, &iwp_u->flags))
 		return -EFAULT;
 
-	if (__put_user(compat_ptr(pointer), &iwp->pointer) ||
+	if (__put_user(compat_ptr(pointer_u), &iwp->pointer) ||
 	    __put_user(length, &iwp->length) ||
 	    __put_user(flags, &iwp->flags))
 		return -EFAULT;
 
-	return sys_ioctl(fd, cmd, (unsigned long) iwr);
+	ret = sys_ioctl(fd, cmd, (unsigned long) iwr);
+
+	if (__get_user(pointer, &iwp->pointer) ||
+	    __get_user(length, &iwp->length) ||
+	    __get_user(flags, &iwp->flags))
+		return -EFAULT;
+
+	if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) ||
+	    __put_user(length, &iwp_u->length) ||
+	    __put_user(flags, &iwp_u->flags))
+		return -EFAULT;
+
+	return ret;
 }
 
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
@@ -3176,6 +3190,8 @@ COMPATIBLE_IOCTL(SIOCSIWRETRY)
 COMPATIBLE_IOCTL(SIOCGIWRETRY)
 COMPATIBLE_IOCTL(SIOCSIWPOWER)
 COMPATIBLE_IOCTL(SIOCGIWPOWER)
+COMPATIBLE_IOCTL(SIOCSIWAUTH)
+COMPATIBLE_IOCTL(SIOCGIWAUTH)
 /* hiddev */
 COMPATIBLE_IOCTL(HIDIOCGVERSION)
 COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5d40ad13ab5..131954b3fb9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -357,10 +357,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
 		goto out;
 	}
-	if (special_file(lower_inode->i_mode)) {
-		ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
-		goto out;
-	}
 	if (!nd) {
 		ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
 				"as we *think* we are about to unlink\n");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index e4ab7bc14ef..fd3f94d4a66 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -834,7 +834,8 @@ static void ecryptfs_sync_page(struct page *page)
 		ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
 		return;
 	}
-	lower_page->mapping->a_ops->sync_page(lower_page);
+	if (lower_page->mapping->a_ops->sync_page)
+		lower_page->mapping->a_ops->sync_page(lower_page);
 	ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
 			lower_page->index);
 	unlock_page(lower_page);
diff --git a/fs/exec.c b/fs/exec.c
index c21a8cc0627..073b0b8c6d0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,7 +50,6 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
-#include <linux/signalfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk)
 	 * and we can just re-use it all.
 	 */
 	if (atomic_read(&oldsighand->count) <= 1) {
-		signalfd_detach(tsk);
 		exit_itimers(sig);
 		return 0;
 	}
@@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk)
 	sig->flags = 0;
 
 no_thread_group:
-	signalfd_detach(tsk);
 	exit_itimers(sig);
 	if (leader)
 		release_task(leader);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b817..c1fa1908dba 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
 	u32 hash;
-	u32 offs;
+	u16 offs;
+	u16 size;
 };
 
 #ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 
 	entries = (struct dx_entry *) (((char *)&root->info) +
 				       root->info.info_length);
-	assert(dx_get_limit(entries) == dx_root_limit(dir,
-						      root->info.info_length));
+
+	if (dx_get_limit(entries) != dx_root_limit(dir,
+						   root->info.info_length)) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "dx entry: limit != root limit");
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
 	dxtrace (printk("Look up %x", hash));
 	while (1)
 	{
 		count = dx_get_count(entries);
-		assert (count && count <= dx_get_limit(entries));
+		if (!count || count > dx_get_limit(entries)) {
+			ext3_warning(dir->i_sb, __FUNCTION__,
+				     "dx entry: no count or count > limit");
+			brelse(bh);
+			*err = ERR_BAD_DX_DIR;
+			goto fail2;
+		}
+
 		p = entries + 1;
 		q = entries + count - 1;
 		while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
-		assert (dx_get_limit(entries) == dx_node_limit (dir));
+		if (dx_get_limit(entries) != dx_node_limit (dir)) {
+			ext3_warning(dir->i_sb, __FUNCTION__,
+				     "dx entry: limit != node limit");
+			brelse(bh);
+			*err = ERR_BAD_DX_DIR;
+			goto fail2;
+		}
 		frame++;
+		frame->bh = NULL;
 	}
 fail2:
 	while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
 		frame--;
 	}
 fail:
+	if (*err == ERR_BAD_DX_DIR)
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Corrupt dir inode %ld, running e2fsck is "
+			     "recommended.", dir->i_ino);
 	return NULL;
 }
 
@@ -671,6 +698,10 @@ errout:
  * Directory block splitting, compacting
  */
 
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 			ext3fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
-			map_tail->offs = (u32) ((char *) de - base);
+			map_tail->offs = (u16) ((char *) de - base);
+			map_tail->size = le16_to_cpu(de->rec_len);
 			count++;
 			cond_resched();
 		}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 	return count;
 }
 
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
         struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
 }
 
 #ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext3_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 	return (struct ext3_dir_entry_2 *) (to - rec_len);
 }
 
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
 	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 	return prev;
 }
 
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 			struct buffer_head **bh,struct dx_frame *frame,
 			struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	u32 hash2;
 	struct dx_map_entry *map;
 	char *data1 = (*bh)->b_data, *data2;
-	unsigned split;
+	unsigned split, move, size, i;
 	struct ext3_dir_entry_2 *de = NULL, *de2;
 	int	err = 0;
 
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
 			     blocksize, hinfo, map);
 	map -= count;
-	split = count/2; // need to adjust to actual middle
 	dx_sort_map (map, count);
+	/* Split the existing block in the middle, size-wise */
+	size = 0;
+	move = 0;
+	for (i = count-1; i >= 0; i--) {
+		/* is more than half of this entry in 2nd half of the block? */
+		if (size + map[i].size/2 > blocksize/2)
+			break;
+		size += map[i].size;
+		move++;
+	}
+	/* map index at which we will split */
+	split = count - move;
 	hash2 = map[split].hash;
 	continued = hash2 == map[split - 1].hash;
 	dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 22cfdd61c06..9537316a071 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2578,8 +2578,11 @@ static int ext3_release_dquot(struct dquot *dquot)
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
 					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
-	if (IS_ERR(handle))
+	if (IS_ERR(handle)) {
+		/* Release dquot anyway to avoid endless cycle in dqput() */
+		dquot_release(dquot);
 		return PTR_ERR(handle);
+	}
 	ret = dquot_release(dquot);
 	err = ext3_journal_stop(handle);
 	if (!ret)
@@ -2712,6 +2715,12 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
+	if (!handle) {
+		printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)"
+			" cancelled because transaction is not started.\n",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
+	}
 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af7..5fdb862e71c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
 	u32 hash;
-	u32 offs;
+	u16 offs;
+	u16 size;
 };
 
 #ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 
 	entries = (struct dx_entry *) (((char *)&root->info) +
 				       root->info.info_length);
-	assert(dx_get_limit(entries) == dx_root_limit(dir,
-						      root->info.info_length));
+
+	if (dx_get_limit(entries) != dx_root_limit(dir,
+						   root->info.info_length)) {
+		ext4_warning(dir->i_sb, __FUNCTION__,
+			     "dx entry: limit != root limit");
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
 	dxtrace (printk("Look up %x", hash));
 	while (1)
 	{
 		count = dx_get_count(entries);
-		assert (count && count <= dx_get_limit(entries));
+		if (!count || count > dx_get_limit(entries)) {
+			ext4_warning(dir->i_sb, __FUNCTION__,
+				     "dx entry: no count or count > limit");
+			brelse(bh);
+			*err = ERR_BAD_DX_DIR;
+			goto fail2;
+		}
+
 		p = entries + 1;
 		q = entries + count - 1;
 		while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
-		assert (dx_get_limit(entries) == dx_node_limit (dir));
+		if (dx_get_limit(entries) != dx_node_limit (dir)) {
+			ext4_warning(dir->i_sb, __FUNCTION__,
+				     "dx entry: limit != node limit");
+			brelse(bh);
+			*err = ERR_BAD_DX_DIR;
+			goto fail2;
+		}
 		frame++;
+		frame->bh = NULL;
 	}
 fail2:
 	while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
 		frame--;
 	}
 fail:
+	if (*err == ERR_BAD_DX_DIR)
+		ext4_warning(dir->i_sb, __FUNCTION__,
+			     "Corrupt dir inode %ld, running e2fsck is "
+			     "recommended.", dir->i_ino);
 	return NULL;
 }
 
@@ -671,6 +698,10 @@ errout:
  * Directory block splitting, compacting
  */
 
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
-			map_tail->offs = (u32) ((char *) de - base);
+			map_tail->offs = (u16) ((char *) de - base);
+			map_tail->size = le16_to_cpu(de->rec_len);
 			count++;
 			cond_resched();
 		}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 	return count;
 }
 
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
 	struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
 }
 
 #ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext4_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 	return (struct ext4_dir_entry_2 *) (to - rec_len);
 }
 
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
 	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 	return prev;
 }
 
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 			struct buffer_head **bh,struct dx_frame *frame,
 			struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	u32 hash2;
 	struct dx_map_entry *map;
 	char *data1 = (*bh)->b_data, *data2;
-	unsigned split;
+	unsigned split, move, size, i;
 	struct ext4_dir_entry_2 *de = NULL, *de2;
 	int	err = 0;
 
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
 			     blocksize, hinfo, map);
 	map -= count;
-	split = count/2; // need to adjust to actual middle
 	dx_sort_map (map, count);
+	/* Split the existing block in the middle, size-wise */
+	size = 0;
+	move = 0;
+	for (i = count-1; i >= 0; i--) {
+		/* is more than half of this entry in 2nd half of the block? */
+		if (size + map[i].size/2 > blocksize/2)
+			break;
+		size += map[i].size;
+		move++;
+	}
+	/* map index at which we will split */
+	split = count - move;
 	hash2 = map[split].hash;
 	continued = hash2 == map[split - 1].hash;
 	dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83ab1c..3c1397fa83d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2698,8 +2698,11 @@ static int ext4_release_dquot(struct dquot *dquot)
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
 					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
-	if (IS_ERR(handle))
+	if (IS_ERR(handle)) {
+		/* Release dquot anyway to avoid endless cycle in dqput() */
+		dquot_release(dquot);
 		return PTR_ERR(handle);
+	}
 	ret = dquot_release(dquot);
 	err = ext4_journal_stop(handle);
 	if (!ret)
@@ -2832,6 +2835,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
+	if (!handle) {
+		printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
+			" cancelled because transaction is not started.\n",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
+	}
 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c848a191525..950c2fbb815 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -82,14 +82,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	int ret;
 
 	/*
-	 * vma alignment has already been checked by prepare_hugepage_range.
-	 * If you add any error returns here, do so after setting VM_HUGETLB,
-	 * so is_vm_hugetlb_page tests below unmap_region go the right way
-	 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64).
+	 * vma address alignment (but not the pgoff alignment) has
+	 * already been checked by prepare_hugepage_range.  If you add
+	 * any error returns here, do so after setting VM_HUGETLB, so
+	 * is_vm_hugetlb_page tests below unmap_region go the right
+	 * way when do_mmap_pgoff unwinds (may be important on powerpc
+	 * and ia64).
 	 */
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
 
+	if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
+
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	mutex_lock(&inode->i_mutex);
@@ -132,7 +137,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len, pgoff))
+		if (prepare_hugepage_range(addr, len))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index dd64ddc11d4..ed85f9afdbc 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -647,7 +647,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
 	struct inode *inode = OFNI_EDONI_2SFFJ(f);
 	struct page *pg;
 
-	pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+	pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
 			     (void *)jffs2_do_readpage_unlock, inode);
 	if (IS_ERR(pg))
 		return (void *)pg;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a21e4bc5444..d120ec39bcb 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -171,19 +171,14 @@ found:
  * GRANTED_RES message by cookie, without having to rely on the client's IP
  * address. --okir
  */
-static inline struct nlm_block *
-nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
-		struct nlm_lock *lock, struct nlm_cookie *cookie)
+static struct nlm_block *
+nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
+		    struct nlm_file *file, struct nlm_lock *lock,
+		    struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block;
-	struct nlm_host		*host;
 	struct nlm_rqst		*call = NULL;
 
-	/* Create host handle for callback */
-	host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
-	if (host == NULL)
-		return NULL;
-
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return NULL;
@@ -366,6 +361,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block = NULL;
+	struct nlm_host		*host;
 	int			error;
 	__be32			ret;
 
@@ -377,6 +373,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_end,
 				wait);
 
+	/* Create host handle for callback */
+	host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
+	if (host == NULL)
+		return nlm_lck_denied_nolocks;
 
 	/* Lock file against concurrent access */
 	mutex_lock(&file->f_mutex);
@@ -385,7 +385,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	 */
 	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
-		block = nlmsvc_create_block(rqstp, file, lock, cookie);
+		block = nlmsvc_create_block(rqstp, nlm_get_host(host), file,
+				lock, cookie);
 		ret = nlm_lck_denied_nolocks;
 		if (block == NULL)
 			goto out;
@@ -449,6 +450,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 out:
 	mutex_unlock(&file->f_mutex);
 	nlmsvc_release_block(block);
+	nlm_release_host(host);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
 }
@@ -477,10 +479,17 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 
 	if (block == NULL) {
 		struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
+		struct nlm_host	*host;
 
 		if (conf == NULL)
 			return nlm_granted;
-		block = nlmsvc_create_block(rqstp, file, lock, cookie);
+		/* Create host handle for callback */
+		host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
+		if (host == NULL) {
+			kfree(conf);
+			return nlm_lck_denied_nolocks;
+		}
+		block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
 		if (block == NULL) {
 			kfree(conf);
 			return nlm_granted;
diff --git a/fs/locks.c b/fs/locks.c
index 50857d2d340..c795eaaf6c4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -782,7 +782,7 @@ find_conflict:
 	if (request->fl_flags & FL_ACCESS)
 		goto out;
 	locks_copy_lock(new_fl, request);
-	locks_insert_lock(&inode->i_flock, new_fl);
+	locks_insert_lock(before, new_fl);
 	new_fl = NULL;
 	error = 0;
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a49f9feff77..a204484072f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -588,16 +588,6 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
 	server->namelen  = data->namlen;
 	/* Create a client RPC handle for the NFSv3 ACL management interface */
 	nfs_init_server_aclclient(server);
-	if (clp->cl_nfsversion == 3) {
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
-		if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
-			server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
-	}
-
 	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
 	return 0;
 
@@ -794,6 +784,16 @@ struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
 	error = nfs_probe_fsinfo(server, mntfh, &fattr);
 	if (error < 0)
 		goto error;
+	if (server->nfs_client->rpc_ops->version == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+			server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
 	if (!(fattr.valid & NFS_ATTR_FATTR)) {
 		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
 		if (error < 0) {
@@ -984,6 +984,9 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
 	if (error < 0)
 		goto error;
 
+	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+		server->namelen = NFS4_MAXNAMLEN;
+
 	BUG_ON(!server->nfs_client);
 	BUG_ON(!server->nfs_client->rpc_ops);
 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
@@ -1056,6 +1059,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	if (error < 0)
 		goto error;
 
+	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+		server->namelen = NFS4_MAXNAMLEN;
+
 	dprintk("Referral FSID: %llx:%llx\n",
 		(unsigned long long) server->fsid.major,
 		(unsigned long long) server->fsid.minor);
@@ -1115,6 +1121,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	if (error < 0)
 		goto out_free_server;
 
+	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+		server->namelen = NFS4_MAXNAMLEN;
+
 	dprintk("Cloned FSID: %llx:%llx\n",
 		(unsigned long long) server->fsid.major,
 		(unsigned long long) server->fsid.minor);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ea97408e423..e4a04d16b8b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1162,6 +1162,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 	}
 	if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
 		return NULL;
+	if (name.len > NFS_SERVER(dir)->namelen)
+		return NULL;
 	/* Note: caller is already holding the dir->i_mutex! */
 	dentry = d_alloc(parent, &name);
 	if (dentry == NULL)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c87dc713b5d..579cf8a7d4a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -316,7 +316,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 	if (offset != 0)
 		return;
 	/* Cancel any unstarted writes on this page */
-	nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
+	nfs_wb_page_cancel(page->mapping->host, page);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index d1cbf0a0fbb..522e5ad4d8a 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -175,6 +175,9 @@ next_component:
 		path++;
 	name.len = path - (const char *) name.name;
 
+	if (name.len > NFS4_MAXNAMLEN)
+		return -ENAMETOOLONG;
+
 eat_dot_dir:
 	while (*path == '/')
 		path++;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index aea76d0e5fb..acfc56f9edc 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct work_struct *work)
 void nfs_release_automount_timer(void)
 {
 	if (list_empty(&nfs_automount_list))
-		cancel_delayed_work_sync(&nfs_automount_task);
+		cancel_delayed_work(&nfs_automount_task);
 }
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62b3ae28031..4b90e17555a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -646,7 +646,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
 	rcu_read_lock();
 	delegation = rcu_dereference(NFS_I(state->inode)->delegation);
 	if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
-		delegation_type = delegation->flags;
+		delegation_type = delegation->type;
 	rcu_read_unlock();
 	opendata->o_arg.u.delegation_type = delegation_type;
 	status = nfs4_open_recover(opendata, state);
@@ -1434,7 +1434,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	}
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
-		dentry = res;
+		path.dentry = res;
 	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b2a851c1b8c..b878528b64c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
 	unregister_shrinker(&acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
-	nfs_unregister_sysctl();
 #endif
+	nfs_unregister_sysctl();
 	unregister_filesystem(&nfs_fs_type);
 }
 
@@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char *raw,
 			kfree(string);
 
 			switch (token) {
-			case Opt_udp:
+			case Opt_xprt_udp:
 				mnt->flags &= ~NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = IPPROTO_UDP;
 				mnt->timeo = 7;
 				mnt->retrans = 5;
 				break;
-			case Opt_tcp:
+			case Opt_xprt_tcp:
 				mnt->flags |= NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = IPPROTO_TCP;
 				mnt->timeo = 600;
@@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char *raw,
 			kfree(string);
 
 			switch (token) {
-			case Opt_udp:
+			case Opt_xprt_udp:
 				mnt->mount_server.protocol = IPPROTO_UDP;
 				break;
-			case Opt_tcp:
+			case Opt_xprt_tcp:
 				mnt->mount_server.protocol = IPPROTO_TCP;
 				break;
 			default:
@@ -1153,20 +1153,20 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options,
 		c = strchr(dev_name, ':');
 		if (c == NULL)
 			return -EINVAL;
-		len = c - dev_name - 1;
+		len = c - dev_name;
 		if (len > sizeof(data->hostname))
-			return -EINVAL;
+			return -ENAMETOOLONG;
 		strncpy(data->hostname, dev_name, len);
 		args.nfs_server.hostname = data->hostname;
 
 		c++;
 		if (strlen(c) > NFS_MAXPATHLEN)
-			return -EINVAL;
+			return -ENAMETOOLONG;
 		args.nfs_server.export_path = c;
 
 		status = nfs_try_mount(&args, mntfh);
 		if (status)
-			return -EINVAL;
+			return status;
 
 		/*
 		 * Translate to nfs_mount_data, which nfs_fill_super
@@ -1303,34 +1303,6 @@ static void nfs_clone_super(struct super_block *sb,
  	nfs_initialise_sb(sb);
 }
 
-static int nfs_set_super(struct super_block *s, void *_server)
-{
-	struct nfs_server *server = _server;
-	int ret;
-
-	s->s_fs_info = server;
-	ret = set_anon_super(s, server);
-	if (ret == 0)
-		server->s_dev = s->s_dev;
-	return ret;
-}
-
-static int nfs_compare_super(struct super_block *sb, void *data)
-{
-	struct nfs_server *server = data, *old = NFS_SB(sb);
-
-	if (memcmp(&old->nfs_client->cl_addr,
-				&server->nfs_client->cl_addr,
-				sizeof(old->nfs_client->cl_addr)) != 0)
-		return 0;
-	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
-	if (old->flags & NFS_MOUNT_UNSHARED)
-		return 0;
-	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
-		return 0;
-	return 1;
-}
-
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
 static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
@@ -1359,9 +1331,46 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
 		goto Ebusy;
 	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
 		goto Ebusy;
-	return 0;
+	return 1;
 Ebusy:
-	return -EBUSY;
+	return 0;
+}
+
+struct nfs_sb_mountdata {
+	struct nfs_server *server;
+	int mntflags;
+};
+
+static int nfs_set_super(struct super_block *s, void *data)
+{
+	struct nfs_sb_mountdata *sb_mntdata = data;
+	struct nfs_server *server = sb_mntdata->server;
+	int ret;
+
+	s->s_flags = sb_mntdata->mntflags;
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
+}
+
+static int nfs_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_sb_mountdata *sb_mntdata = data;
+	struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
+	int mntflags = sb_mntdata->mntflags;
+
+	if (memcmp(&old->nfs_client->cl_addr,
+				&server->nfs_client->cl_addr,
+				sizeof(old->nfs_client->cl_addr)) != 0)
+		return 0;
+	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
+	if (old->flags & NFS_MOUNT_UNSHARED)
+		return 0;
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+		return 0;
+	return nfs_compare_mount_options(sb, server, mntflags);
 }
 
 static int nfs_get_sb(struct file_system_type *fs_type,
@@ -1373,6 +1382,9 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	struct nfs_mount_data *data = raw_data;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+	struct nfs_sb_mountdata sb_mntdata = {
+		.mntflags = flags,
+	};
 	int error;
 
 	/* Validate the mount data */
@@ -1386,28 +1398,25 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		error = PTR_ERR(server);
 		goto out;
 	}
+	sb_mntdata.server = server;
 
 	if (server->flags & NFS_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
-		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
-		if (error < 0)
-			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		s->s_flags = flags;
 		nfs_fill_super(s, data);
 	}
 
@@ -1460,6 +1469,9 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct nfs_server *server;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+	struct nfs_sb_mountdata sb_mntdata = {
+		.mntflags = flags,
+	};
 	int error;
 
 	dprintk("--> nfs_xdev_get_sb()\n");
@@ -1470,28 +1482,25 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		error = PTR_ERR(server);
 		goto out_err_noserver;
 	}
+	sb_mntdata.server = server;
 
 	if (server->flags & NFS_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
-		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
-		if (error < 0)
-			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		s->s_flags = flags;
 		nfs_clone_super(s, data->sb);
 	}
 
@@ -1668,7 +1677,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
 		/* while calculating len, pretend ':' is '\0' */
 		len = c - dev_name;
 		if (len > NFS4_MAXNAMLEN)
-			return -EINVAL;
+			return -ENAMETOOLONG;
 		*hostname = kzalloc(len, GFP_KERNEL);
 		if (*hostname == NULL)
 			return -ENOMEM;
@@ -1677,7 +1686,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
 		c++;			/* step over the ':' */
 		len = strlen(c);
 		if (len > NFS4_MAXPATHLEN)
-			return -EINVAL;
+			return -ENAMETOOLONG;
 		*mntpath = kzalloc(len + 1, GFP_KERNEL);
 		if (*mntpath == NULL)
 			return -ENOMEM;
@@ -1729,6 +1738,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	struct dentry *mntroot;
 	char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+	struct nfs_sb_mountdata sb_mntdata = {
+		.mntflags = flags,
+	};
 	int error;
 
 	/* Validate the mount data */
@@ -1744,12 +1756,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		error = PTR_ERR(server);
 		goto out;
 	}
+	sb_mntdata.server = server;
 
 	if (server->flags & NFS4_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
@@ -1762,7 +1775,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		s->s_flags = flags;
 		nfs4_fill_super(s);
 	}
 
@@ -1816,6 +1828,9 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct nfs_server *server;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+	struct nfs_sb_mountdata sb_mntdata = {
+		.mntflags = flags,
+	};
 	int error;
 
 	dprintk("--> nfs4_xdev_get_sb()\n");
@@ -1826,12 +1841,13 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		error = PTR_ERR(server);
 		goto out_err_noserver;
 	}
+	sb_mntdata.server = server;
 
 	if (server->flags & NFS4_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -1844,7 +1860,6 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		s->s_flags = flags;
 		nfs4_clone_super(s, data->sb);
 	}
 
@@ -1887,6 +1902,9 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 	struct dentry *mntroot;
 	struct nfs_fh mntfh;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+	struct nfs_sb_mountdata sb_mntdata = {
+		.mntflags = flags,
+	};
 	int error;
 
 	dprintk("--> nfs4_referral_get_sb()\n");
@@ -1897,12 +1915,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 		error = PTR_ERR(server);
 		goto out_err_noserver;
 	}
+	sb_mntdata.server = server;
 
 	if (server->flags & NFS4_MOUNT_UNSHARED)
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -1915,7 +1934,6 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		s->s_flags = flags;
 		nfs4_fill_super(s);
 	}
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ef97e0c0f5b..0d7a77cc394 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1396,6 +1396,50 @@ out:
 	return ret;
 }
 
+int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+{
+	struct nfs_page *req;
+	loff_t range_start = page_offset(page);
+	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	struct writeback_control wbc = {
+		.bdi = page->mapping->backing_dev_info,
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+	int ret = 0;
+
+	BUG_ON(!PageLocked(page));
+	for (;;) {
+		req = nfs_page_find_request(page);
+		if (req == NULL)
+			goto out;
+		if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+			nfs_release_request(req);
+			break;
+		}
+		if (nfs_lock_request_dontget(req)) {
+			nfs_inode_remove_request(req);
+			/*
+			 * In case nfs_inode_remove_request has marked the
+			 * page as being dirty
+			 */
+			cancel_dirty_page(page, PAGE_CACHE_SIZE);
+			nfs_unlock_request(req);
+			break;
+		}
+		ret = nfs_wait_on_request(req);
+		if (ret < 0)
+			goto out;
+	}
+	if (!PagePrivate(page))
+		return 0;
+	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
+out:
+	return ret;
+}
+
 int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
 {
 	loff_t range_start = page_offset(page);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0eb464a39aa..7011d62acfc 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -566,13 +566,23 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
 	case FSID_DEV:
 	case FSID_ENCODE_DEV:
 	case FSID_MAJOR_MINOR:
-		return FSIDSOURCE_DEV;
+		if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags
+		    & FS_REQUIRES_DEV)
+			return FSIDSOURCE_DEV;
+		break;
 	case FSID_NUM:
-		return FSIDSOURCE_FSID;
-	default:
 		if (fhp->fh_export->ex_flags & NFSEXP_FSID)
 			return FSIDSOURCE_FSID;
-		else
-			return FSIDSOURCE_UUID;
+		break;
+	default:
+		break;
 	}
+	/* either a UUID type filehandle, or the filehandle doesn't
+	 * match the export.
+	 */
+	if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+		return FSIDSOURCE_FSID;
+	if (fhp->fh_export->ex_uuid)
+		return FSIDSOURCE_UUID;
+	return FSIDSOURCE_DEV;
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a0c2b253818..7867151ebb8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -115,7 +115,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 
 	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
 	if (IS_ERR(exp2)) {
-		err = PTR_ERR(exp2);
+		if (PTR_ERR(exp2) != -ENOENT)
+			err = PTR_ERR(exp2);
 		dput(mounts);
 		mntput(mnt);
 		goto out;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4f517665c9a..778a850b463 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5602,6 +5602,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 				      clusters_to_del;
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	le32_add_cpu(&fe->i_clusters, -clusters_to_del);
+	inode->i_blocks = ocfs2_inode_sector_count(inode);
 
 	status = ocfs2_trim_tree(inode, path, handle, tc,
 				 clusters_to_del, &delete_blk);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 460d440310f..f37f25c931f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -855,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 				  struct ocfs2_super *osb, loff_t pos,
 				  unsigned len, struct buffer_head *di_bh)
 {
+	u32 cend;
 	struct ocfs2_write_ctxt *wc;
 
 	wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
@@ -862,7 +863,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 		return -ENOMEM;
 
 	wc->w_cpos = pos >> osb->s_clustersize_bits;
-	wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
+	cend = (pos + len - 1) >> osb->s_clustersize_bits;
+	wc->w_clen = cend - wc->w_cpos + 1;
 	get_bh(di_bh);
 	wc->w_di_bh = di_bh;
 
@@ -928,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
 				loff_t user_pos, unsigned user_len)
 {
 	int i;
-	unsigned from, to;
+	unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+		to = user_pos + user_len;
 	struct page *tmppage;
 
-	ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
-
-	if (wc->w_large_pages) {
-		from = wc->w_target_from;
-		to = wc->w_target_to;
-	} else {
-		from = 0;
-		to = PAGE_CACHE_SIZE;
-	}
+	ocfs2_zero_new_buffers(wc->w_target_page, from, to);
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		tmppage = wc->w_pages[i];
@@ -989,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
 			map_from = cluster_start;
 			map_to = cluster_end;
 		}
-
-		wc->w_target_from = map_from;
-		wc->w_target_to = map_to;
 	} else {
 		/*
 		 * If we haven't allocated the new page yet, we
@@ -1209,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
 				       loff_t pos, unsigned len)
 {
 	int ret, i;
+	loff_t cluster_off;
+	unsigned int local_len = len;
 	struct ocfs2_write_cluster_desc *desc;
+	struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
 
 	for (i = 0; i < wc->w_clen; i++) {
 		desc = &wc->w_desc[i];
 
+		/*
+		 * We have to make sure that the total write passed in
+		 * doesn't extend past a single cluster.
+		 */
+		local_len = len;
+		cluster_off = pos & (osb->s_clustersize - 1);
+		if ((cluster_off + local_len) > osb->s_clustersize)
+			local_len = osb->s_clustersize - cluster_off;
+
 		ret = ocfs2_write_cluster(mapping, desc->c_phys,
 					  desc->c_unwritten, data_ac, meta_ac,
-					  wc, desc->c_cpos, pos, len);
+					  wc, desc->c_cpos, pos, local_len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
+
+		len -= local_len;
+		pos += local_len;
 	}
 
 	ret = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4ffa715be09..f3bc3658e7a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -314,7 +314,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 	}
 
 	i_size_write(inode, new_i_size);
-	inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 
 	di = (struct ocfs2_dinode *) fe_bh->b_data;
@@ -492,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_claim_clusters(osb, handle, data_ac, 1,
-				      &bit_off, &num_bits);
+	status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+					clusters_to_add, &bit_off, &num_bits);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 545f7892cdf..d272847d5a0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -514,8 +514,10 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 	ac->ac_bh = osb->local_alloc_bh;
 	status = 0;
 bail:
-	if (status < 0 && local_alloc_inode)
+	if (status < 0 && local_alloc_inode) {
+		mutex_unlock(&local_alloc_inode->i_mutex);
 		iput(local_alloc_inode);
+	}
 
 	mlog_exit(status);
 	return status;
@@ -524,13 +526,12 @@ bail:
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 				 handle_t *handle,
 				 struct ocfs2_alloc_context *ac,
-				 u32 min_bits,
+				 u32 bits_wanted,
 				 u32 *bit_off,
 				 u32 *num_bits)
 {
 	int status, start;
 	struct inode *local_alloc_inode;
-	u32 bits_wanted;
 	void *bitmap;
 	struct ocfs2_dinode *alloc;
 	struct ocfs2_local_alloc *la;
@@ -538,7 +539,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	mlog_entry_void();
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
 
-	bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
 	local_alloc_inode = ac->ac_inode;
 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 	la = OCFS2_LOCAL_ALLOC(alloc);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 385a10152f9..3f76631e110 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 				 handle_t *handle,
 				 struct ocfs2_alloc_context *ac,
-				 u32 min_bits,
+				 u32 bits_wanted,
 				 u32 *bit_off,
 				 u32 *num_bits);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d9c5c9fcb30..8f09f5235e3 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
  * contig. allocation, set to '1' to indicate we can deal with extents
  * of any size.
  */
-int ocfs2_claim_clusters(struct ocfs2_super *osb,
-			 handle_t *handle,
-			 struct ocfs2_alloc_context *ac,
-			 u32 min_clusters,
-			 u32 *cluster_start,
-			 u32 *num_clusters)
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+			   handle_t *handle,
+			   struct ocfs2_alloc_context *ac,
+			   u32 min_clusters,
+			   u32 max_clusters,
+			   u32 *cluster_start,
+			   u32 *num_clusters)
 {
 	int status;
-	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+	unsigned int bits_wanted = max_clusters;
 	u64 bg_blkno = 0;
 	u16 bg_bit_off;
 
 	mlog_entry_void();
 
-	BUG_ON(!ac);
 	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
 
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -1557,6 +1557,19 @@ bail:
 	return status;
 }
 
+int ocfs2_claim_clusters(struct ocfs2_super *osb,
+			 handle_t *handle,
+			 struct ocfs2_alloc_context *ac,
+			 u32 min_clusters,
+			 u32 *cluster_start,
+			 u32 *num_clusters)
+{
+	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+
+	return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
+				      bits_wanted, cluster_start, num_clusters);
+}
+
 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 					       struct inode *alloc_inode,
 					       struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index f212dc01a84..cafe9370309 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 			 u32 min_clusters,
 			 u32 *cluster_start,
 			 u32 *num_clusters);
+/*
+ * Use this variant of ocfs2_claim_clusters to specify a maxiumum
+ * number of clusters smaller than the allocation reserved.
+ */
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+			   handle_t *handle,
+			   struct ocfs2_alloc_context *ac,
+			   u32 min_clusters,
+			   u32 max_clusters,
+			   u32 *cluster_start,
+			   u32 *num_clusters);
 
 int ocfs2_free_suballoc_bits(handle_t *handle,
 			     struct inode *alloc_inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f2fc9a795de..c034b5129c1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -81,8 +81,15 @@ static struct dentry *ocfs2_debugfs_root = NULL;
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
 
+struct mount_options
+{
+	unsigned long	mount_opt;
+	unsigned int	atime_quantum;
+	signed short	slot;
+};
+
 static int ocfs2_parse_options(struct super_block *sb, char *options,
-			       unsigned long *mount_opt, s16 *slot,
+			       struct mount_options *mopt,
 			       int is_remount);
 static void ocfs2_put_super(struct super_block *sb);
 static int ocfs2_mount_volume(struct super_block *sb);
@@ -367,24 +374,23 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 {
 	int incompat_features;
 	int ret = 0;
-	unsigned long parsed_options;
-	s16 slot;
+	struct mount_options parsed_options;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
-	if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
+	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
-	    (parsed_options & OCFS2_MOUNT_HB_LOCAL)) {
+	    (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
 		ret = -EINVAL;
 		mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
 		goto out;
 	}
 
 	if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
-	    (parsed_options & OCFS2_MOUNT_DATA_WRITEBACK)) {
+	    (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
 		ret = -EINVAL;
 		mlog(ML_ERROR, "Cannot change data mode on remount\n");
 		goto out;
@@ -435,7 +441,9 @@ unlock_osb:
 
 		/* Only save off the new mount options in case of a successful
 		 * remount. */
-		osb->s_mount_opt = parsed_options;
+		osb->s_mount_opt = parsed_options.mount_opt;
+		osb->s_atime_quantum = parsed_options.atime_quantum;
+		osb->preferred_slot = parsed_options.slot;
 	}
 out:
 	return ret;
@@ -547,8 +555,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct dentry *root;
 	int status, sector_size;
-	unsigned long parsed_opt;
-	s16 slot;
+	struct mount_options parsed_options;
 	struct inode *inode = NULL;
 	struct ocfs2_super *osb = NULL;
 	struct buffer_head *bh = NULL;
@@ -556,14 +563,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
 	mlog_entry("%p, %p, %i", sb, data, silent);
 
-	if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
+	if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
 		status = -EINVAL;
 		goto read_super_error;
 	}
 
 	/* for now we only have one cluster/node, make sure we see it
 	 * in the heartbeat universe */
-	if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) {
+	if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) {
 		if (!o2hb_check_local_node_heartbeating()) {
 			status = -EINVAL;
 			goto read_super_error;
@@ -585,8 +592,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	brelse(bh);
 	bh = NULL;
-	osb->s_mount_opt = parsed_opt;
-	osb->preferred_slot = slot;
+	osb->s_mount_opt = parsed_options.mount_opt;
+	osb->s_atime_quantum = parsed_options.atime_quantum;
+	osb->preferred_slot = parsed_options.slot;
 
 	sb->s_magic = OCFS2_SUPER_MAGIC;
 
@@ -728,8 +736,7 @@ static struct file_system_type ocfs2_fs_type = {
 
 static int ocfs2_parse_options(struct super_block *sb,
 			       char *options,
-			       unsigned long *mount_opt,
-			       s16 *slot,
+			       struct mount_options *mopt,
 			       int is_remount)
 {
 	int status;
@@ -738,8 +745,9 @@ static int ocfs2_parse_options(struct super_block *sb,
 	mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
 		   options ? options : "(none)");
 
-	*mount_opt = 0;
-	*slot = OCFS2_INVALID_SLOT;
+	mopt->mount_opt = 0;
+	mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+	mopt->slot = OCFS2_INVALID_SLOT;
 
 	if (!options) {
 		status = 1;
@@ -749,7 +757,6 @@ static int ocfs2_parse_options(struct super_block *sb,
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token, option;
 		substring_t args[MAX_OPT_ARGS];
-		struct ocfs2_super * osb = OCFS2_SB(sb);
 
 		if (!*p)
 			continue;
@@ -757,10 +764,10 @@ static int ocfs2_parse_options(struct super_block *sb,
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_hb_local:
-			*mount_opt |= OCFS2_MOUNT_HB_LOCAL;
+			mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
 			break;
 		case Opt_hb_none:
-			*mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
+			mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
 			break;
 		case Opt_barrier:
 			if (match_int(&args[0], &option)) {
@@ -768,27 +775,27 @@ static int ocfs2_parse_options(struct super_block *sb,
 				goto bail;
 			}
 			if (option)
-				*mount_opt |= OCFS2_MOUNT_BARRIER;
+				mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
 			else
-				*mount_opt &= ~OCFS2_MOUNT_BARRIER;
+				mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
 			break;
 		case Opt_intr:
-			*mount_opt &= ~OCFS2_MOUNT_NOINTR;
+			mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
 			break;
 		case Opt_nointr:
-			*mount_opt |= OCFS2_MOUNT_NOINTR;
+			mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
 			break;
 		case Opt_err_panic:
-			*mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
+			mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
 			break;
 		case Opt_err_ro:
-			*mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
+			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
 			break;
 		case Opt_data_ordered:
-			*mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
+			mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
 			break;
 		case Opt_data_writeback:
-			*mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
+			mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
 			break;
 		case Opt_atime_quantum:
 			if (match_int(&args[0], &option)) {
@@ -796,9 +803,7 @@ static int ocfs2_parse_options(struct super_block *sb,
 				goto bail;
 			}
 			if (option >= 0)
-				osb->s_atime_quantum = option;
-			else
-				osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+				mopt->atime_quantum = option;
 			break;
 		case Opt_slot:
 			option = 0;
@@ -807,7 +812,7 @@ static int ocfs2_parse_options(struct super_block *sb,
 				goto bail;
 			}
 			if (option)
-				*slot = (s16)option;
+				mopt->slot = (s16)option;
 			break;
 		default:
 			mlog(ML_ERROR,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 66a13ee63d4..c05358538f2 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -66,7 +66,7 @@ struct ocfs2_vote_msg
 {
 	struct ocfs2_msg_hdr v_hdr;
 	__be32 v_reserved1;
-};
+} __attribute__ ((packed));
 
 /* Responses are given these values to maintain backwards
  * compatibility with older ocfs2 versions */
@@ -78,7 +78,7 @@ struct ocfs2_response_msg
 {
 	struct ocfs2_msg_hdr r_hdr;
 	__be32 r_response;
-};
+} __attribute__ ((packed));
 
 struct ocfs2_vote_work {
 	struct list_head   w_list;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 965625a0977..ee4814dd98f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -320,7 +320,21 @@ int proc_pid_status(struct task_struct *task, char *buffer)
 	return buffer - orig;
 }
 
-static clock_t task_utime(struct task_struct *p)
+/*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+static cputime_t task_utime(struct task_struct *p)
+{
+	return p->utime;
+}
+
+static cputime_t task_stime(struct task_struct *p)
+{
+	return p->stime;
+}
+#else
+static cputime_t task_utime(struct task_struct *p)
 {
 	clock_t utime = cputime_to_clock_t(p->utime),
 		total = utime + cputime_to_clock_t(p->stime);
@@ -337,10 +351,10 @@ static clock_t task_utime(struct task_struct *p)
 	}
 	utime = (clock_t)temp;
 
-	return utime;
+	return clock_t_to_cputime(utime);
 }
 
-static clock_t task_stime(struct task_struct *p)
+static cputime_t task_stime(struct task_struct *p)
 {
 	clock_t stime;
 
@@ -349,10 +363,12 @@ static clock_t task_stime(struct task_struct *p)
 	 * the total, to make sure the total observed by userspace
 	 * grows monotonically - apps rely on that):
 	 */
-	stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+			cputime_to_clock_t(task_utime(p));
 
-	return stime;
+	return clock_t_to_cputime(stime);
 }
+#endif
 
 static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 {
@@ -368,8 +384,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 	unsigned long long start_time;
 	unsigned long cmin_flt = 0, cmaj_flt = 0;
 	unsigned long  min_flt = 0,  maj_flt = 0;
-	cputime_t cutime, cstime;
-	clock_t utime, stime;
+	cputime_t cutime, cstime, utime, stime;
 	unsigned long rsslim = 0;
 	char tcomm[sizeof(task->comm)];
 	unsigned long flags;
@@ -387,8 +402,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 
 	sigemptyset(&sigign);
 	sigemptyset(&sigcatch);
-	cutime = cstime = cputime_zero;
-	utime = stime = 0;
+	cutime = cstime = utime = stime = cputime_zero;
 
 	rcu_read_lock();
 	if (lock_task_sighand(task, &flags)) {
@@ -414,15 +428,15 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime += task_utime(t);
-				stime += task_stime(t);
+				utime = cputime_add(utime, task_utime(t));
+				stime = cputime_add(stime, task_stime(t));
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime += cputime_to_clock_t(sig->utime);
-			stime += cputime_to_clock_t(sig->stime);
+			utime = cputime_add(utime, sig->utime);
+			stime = cputime_add(stime, sig->stime);
 		}
 
 		sid = signal_session(sig);
@@ -471,8 +485,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 		cmin_flt,
 		maj_flt,
 		cmaj_flt,
-		utime,
-		stime,
+		cputime_to_clock_t(utime),
+		cputime_to_clock_t(stime),
 		cputime_to_clock_t(cutime),
 		cputime_to_clock_t(cstime),
 		priority,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a5b0dfd89a1..0e4d37c93ee 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/stat.h>
 #include <linux/completion.h>
+#include <linux/poll.h>
 #include <linux/file.h>
 #include <linux/limits.h>
 #include <linux/init.h>
@@ -232,7 +233,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
 static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
 {
 	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	unsigned int rv = 0;
+	unsigned int rv = DEFAULT_POLLMASK;
 	unsigned int (*poll)(struct file *, struct poll_table_struct *);
 
 	spin_lock(&pde->pde_unload_lock);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5b68dd3f191..a005451930b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1915,8 +1915,11 @@ static int reiserfs_release_dquot(struct dquot *dquot)
 	ret =
 	    journal_begin(&th, dquot->dq_sb,
 			  REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
-	if (ret)
+	if (ret) {
+		/* Release dquot anyway to avoid endless cycle in dqput() */
+		dquot_release(dquot);
 		goto out;
+	}
 	ret = dquot_release(dquot);
 	err =
 	    journal_end(&th, dquot->dq_sb,
@@ -2067,6 +2070,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head tmp_bh, *bh;
 
+	if (!current->journal_info) {
+		printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
+			" cancelled because transaction is not started.\n",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
+	}
 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
@@ -2098,7 +2107,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 		data += tocopy;
 		blk++;
 	}
-      out:
+out:
 	if (len == towrite)
 		return err;
 	if (inode->i_size < off + len - towrite)
diff --git a/fs/select.c b/fs/select.c
index a974082b082..46dca31c607 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,8 +26,6 @@
 
 #include <asm/uaccess.h>
 
-#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
-
 struct poll_table_page {
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index a8e293d3003..aefb0be0794 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -11,8 +11,10 @@
  *      Now using anonymous inode source.
  *      Thanks to Oleg Nesterov for useful code review and suggestions.
  *      More comments and suggestions from Arnd Bergmann.
- * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
+ *  Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
  *      Retrieve multiple signals with one read() call
+ *  Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
+ *      Attach to the sighand only during read() and poll().
  */
 
 #include <linux/file.h>
@@ -27,102 +29,12 @@
 #include <linux/signalfd.h>
 
 struct signalfd_ctx {
-	struct list_head lnk;
-	wait_queue_head_t wqh;
 	sigset_t sigmask;
-	struct task_struct *tsk;
 };
 
-struct signalfd_lockctx {
-	struct task_struct *tsk;
-	unsigned long flags;
-};
-
-/*
- * Tries to acquire the sighand lock. We do not increment the sighand
- * use count, and we do not even pin the task struct, so we need to
- * do it inside an RCU read lock, and we must be prepared for the
- * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
- * being detached. We return 0 if the sighand has been detached, or
- * 1 if we were able to pin the sighand lock.
- */
-static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
-{
-	struct sighand_struct *sighand = NULL;
-
-	rcu_read_lock();
-	lk->tsk = rcu_dereference(ctx->tsk);
-	if (likely(lk->tsk != NULL))
-		sighand = lock_task_sighand(lk->tsk, &lk->flags);
-	rcu_read_unlock();
-
-	if (!sighand)
-		return 0;
-
-	if (!ctx->tsk) {
-		unlock_task_sighand(lk->tsk, &lk->flags);
-		return 0;
-	}
-
-	if (lk->tsk->tgid == current->tgid)
-		lk->tsk = current;
-
-	return 1;
-}
-
-static void signalfd_unlock(struct signalfd_lockctx *lk)
-{
-	unlock_task_sighand(lk->tsk, &lk->flags);
-}
-
-/*
- * This must be called with the sighand lock held.
- */
-void signalfd_deliver(struct task_struct *tsk, int sig)
-{
-	struct sighand_struct *sighand = tsk->sighand;
-	struct signalfd_ctx *ctx, *tmp;
-
-	BUG_ON(!sig);
-	list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
-		/*
-		 * We use a negative signal value as a way to broadcast that the
-		 * sighand has been orphaned, so that we can notify all the
-		 * listeners about this. Remember the ctx->sigmask is inverted,
-		 * so if the user is interested in a signal, that corresponding
-		 * bit will be zero.
-		 */
-		if (sig < 0) {
-			if (ctx->tsk == tsk) {
-				ctx->tsk = NULL;
-				list_del_init(&ctx->lnk);
-				wake_up(&ctx->wqh);
-			}
-		} else {
-			if (!sigismember(&ctx->sigmask, sig))
-				wake_up(&ctx->wqh);
-		}
-	}
-}
-
-static void signalfd_cleanup(struct signalfd_ctx *ctx)
-{
-	struct signalfd_lockctx lk;
-
-	/*
-	 * This is tricky. If the sighand is gone, we do not need to remove
-	 * context from the list, the list itself won't be there anymore.
-	 */
-	if (signalfd_lock(ctx, &lk)) {
-		list_del(&ctx->lnk);
-		signalfd_unlock(&lk);
-	}
-	kfree(ctx);
-}
-
 static int signalfd_release(struct inode *inode, struct file *file)
 {
-	signalfd_cleanup(file->private_data);
+	kfree(file->private_data);
 	return 0;
 }
 
@@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
 {
 	struct signalfd_ctx *ctx = file->private_data;
 	unsigned int events = 0;
-	struct signalfd_lockctx lk;
 
-	poll_wait(file, &ctx->wqh, wait);
+	poll_wait(file, &current->sighand->signalfd_wqh, wait);
 
-	/*
-	 * Let the caller get a POLLIN in this case, ala socket recv() when
-	 * the peer disconnects.
-	 */
-	if (signalfd_lock(ctx, &lk)) {
-		if ((lk.tsk == current &&
-		     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
-		    next_signal(&lk.tsk->signal->shared_pending,
-				&ctx->sigmask) > 0)
-			events |= POLLIN;
-		signalfd_unlock(&lk);
-	} else
+	spin_lock_irq(&current->sighand->siglock);
+	if (next_signal(&current->pending, &ctx->sigmask) ||
+	    next_signal(&current->signal->shared_pending,
+			&ctx->sigmask))
 		events |= POLLIN;
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return events;
 }
@@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
 				int nonblock)
 {
 	ssize_t ret;
-	struct signalfd_lockctx lk;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (!signalfd_lock(ctx, &lk))
-		return 0;
-
-	ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
+	spin_lock_irq(&current->sighand->siglock);
+	ret = dequeue_signal(current, &ctx->sigmask, info);
 	switch (ret) {
 	case 0:
 		if (!nonblock)
 			break;
 		ret = -EAGAIN;
 	default:
-		signalfd_unlock(&lk);
+		spin_unlock_irq(&current->sighand->siglock);
 		return ret;
 	}
 
-	add_wait_queue(&ctx->wqh, &wait);
+	add_wait_queue(&current->sighand->signalfd_wqh, &wait);
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
-		ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
-		signalfd_unlock(&lk);
+		ret = dequeue_signal(current, &ctx->sigmask, info);
 		if (ret != 0)
 			break;
 		if (signal_pending(current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
+		spin_unlock_irq(&current->sighand->siglock);
 		schedule();
-		ret = signalfd_lock(ctx, &lk);
-		if (unlikely(!ret)) {
-			/*
-			 * Let the caller read zero byte, ala socket
-			 * recv() when the peer disconnect. This test
-			 * must be done before doing a dequeue_signal(),
-			 * because if the sighand has been orphaned,
-			 * the dequeue_signal() call is going to crash
-			 * because ->sighand will be long gone.
-			 */
-			 break;
-		}
+		spin_lock_irq(&current->sighand->siglock);
 	}
+	spin_unlock_irq(&current->sighand->siglock);
 
-	remove_wait_queue(&ctx->wqh, &wait);
+	remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
 	__set_current_state(TASK_RUNNING);
 
 	return ret;
 }
 
 /*
- * Returns either the size of a "struct signalfd_siginfo", or zero if the
- * sighand we are attached to, has been orphaned. The "count" parameter
- * must be at least the size of a "struct signalfd_siginfo".
+ * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
+ * error code. The "count" parameter must be at least the size of a
+ * "struct signalfd_siginfo".
  */
 static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 			     loff_t *ppos)
@@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 		return -EINVAL;
 
 	siginfo = (struct signalfd_siginfo __user *) buf;
-
 	do {
 		ret = signalfd_dequeue(ctx, &info, nonblock);
 		if (unlikely(ret <= 0))
@@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 		nonblock = 1;
 	} while (--count);
 
-	return total ? total : ret;
+	return total ? total: ret;
 }
 
 static const struct file_operations signalfd_fops = {
@@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = {
 	.read		= signalfd_read,
 };
 
-/*
- * Create a file descriptor that is associated with our signal
- * state. We can pass it around to others if we want to, but
- * it will always be _our_ signal state.
- */
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
 {
 	int error;
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
-	struct sighand_struct *sighand;
 	struct file *file;
 	struct inode *inode;
-	struct signalfd_lockctx lk;
 
 	if (sizemask != sizeof(sigset_t) ||
 	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 		if (!ctx)
 			return -ENOMEM;
 
-		init_waitqueue_head(&ctx->wqh);
 		ctx->sigmask = sigmask;
-		ctx->tsk = current->group_leader;
-
-		sighand = current->sighand;
-		/*
-		 * Add this fd to the list of signal listeners.
-		 */
-		spin_lock_irq(&sighand->siglock);
-		list_add_tail(&ctx->lnk, &sighand->signalfd_list);
-		spin_unlock_irq(&sighand->siglock);
 
 		/*
 		 * When we call this, the initialization must be complete, since
@@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 			fput(file);
 			return -EINVAL;
 		}
-		/*
-		 * We need to be prepared of the fact that the sighand this fd
-		 * is attached to, has been detched. In that case signalfd_lock()
-		 * will return 0, and we'll just skip setting the new mask.
-		 */
-		if (signalfd_lock(ctx, &lk)) {
-			ctx->sigmask = sigmask;
-			signalfd_unlock(&lk);
-		}
-		wake_up(&ctx->wqh);
+		spin_lock_irq(&current->sighand->siglock);
+		ctx->sigmask = sigmask;
+		spin_unlock_irq(&current->sighand->siglock);
+
+		wake_up(&current->sighand->signalfd_wqh);
 		fput(file);
 	}
 
 	return ufd;
 
 err_fdalloc:
-	signalfd_cleanup(ctx);
+	kfree(ctx);
 	return error;
 }
 
diff --git a/fs/splice.c b/fs/splice.c
index c010a72ca2d..e95a3622886 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1224,6 +1224,33 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 }
 
 /*
+ * Do a copy-from-user while holding the mmap_semaphore for reading, in a
+ * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
+ * for writing) and page faulting on the user memory pointed to by src.
+ * This assumes that we will very rarely hit the partial != 0 path, or this
+ * will not be a win.
+ */
+static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
+{
+	int partial;
+
+	pagefault_disable();
+	partial = __copy_from_user_inatomic(dst, src, n);
+	pagefault_enable();
+
+	/*
+	 * Didn't copy everything, drop the mmap_sem and do a faulting copy
+	 */
+	if (unlikely(partial)) {
+		up_read(&current->mm->mmap_sem);
+		partial = copy_from_user(dst, src, n);
+		down_read(&current->mm->mmap_sem);
+	}
+
+	return partial;
+}
+
+/*
  * Map an iov into an array of pages and offset/length tupples. With the
  * partial_page structure, we can map several non-contiguous ranges into
  * our ones pages[] map instead of splitting that operation into pieces.
@@ -1236,31 +1263,26 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 {
 	int buffers = 0, error = 0;
 
-	/*
-	 * It's ok to take the mmap_sem for reading, even
-	 * across a "get_user()".
-	 */
 	down_read(&current->mm->mmap_sem);
 
 	while (nr_vecs) {
 		unsigned long off, npages;
+		struct iovec entry;
 		void __user *base;
 		size_t len;
 		int i;
 
-		/*
-		 * Get user address base and length for this iovec.
-		 */
-		error = get_user(base, &iov->iov_base);
-		if (unlikely(error))
-			break;
-		error = get_user(len, &iov->iov_len);
-		if (unlikely(error))
+		error = -EFAULT;
+		if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
 			break;
 
+		base = entry.iov_base;
+		len = entry.iov_len;
+
 		/*
 		 * Sanity check this iovec. 0 read succeeds.
 		 */
+		error = 0;
 		if (unlikely(!len))
 			break;
 		error = -EFAULT;
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 135353f8a29..5afe2a26f5d 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -248,12 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 
 void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
-		printk(KERN_ERR "%s: "
-			"bad dentry or inode or no such file: \"%s\"\n",
-			__FUNCTION__, attr->attr.name);
-		dump_stack();
-	}
+	sysfs_hash_and_remove(kobj->sd, attr->attr.name);
 }
 
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 048e6054c2f..83e76b3813c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -762,12 +762,15 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 				struct nameidata *nd)
 {
+	struct dentry *ret = NULL;
 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct sysfs_dirent * sd;
 	struct bin_attribute *bin_attr;
 	struct inode *inode;
 	int found = 0;
 
+	mutex_lock(&sysfs_mutex);
+
 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
 		if (sysfs_type(sd) &&
 		    !strcmp(sd->s_name, dentry->d_name.name)) {
@@ -778,14 +781,14 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	/* no such entry */
 	if (!found)
-		return NULL;
+		goto out_unlock;
 
 	/* attach dentry and inode */
 	inode = sysfs_get_inode(sd);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-
-	mutex_lock(&sysfs_mutex);
+	if (!inode) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out_unlock;
+	}
 
 	if (inode->i_state & I_NEW) {
 		/* initialize inode according to type */
@@ -815,9 +818,9 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	sysfs_instantiate(dentry, inode);
 	sysfs_attach_dentry(sd, dentry);
 
+ out_unlock:
 	mutex_unlock(&sysfs_mutex);
-
-	return NULL;
+	return ret;
 }
 
 const struct inode_operations sysfs_dir_inode_operations = {
@@ -942,6 +945,8 @@ int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
 	if (error)
 		goto out_drop;
 
+	mutex_lock(&sysfs_mutex);
+
 	dup_name = sd->s_name;
 	sd->s_name = new_name;
 
@@ -949,8 +954,6 @@ int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
 	d_add(new_dentry, NULL);
 	d_move(sd->s_dentry, new_dentry);
 
-	mutex_lock(&sysfs_mutex);
-
 	sysfs_unlink_sibling(sd);
 	sysfs_get(new_parent_sd);
 	sysfs_put(sd->s_parent);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 276f7207a56..87e87dcd3f9 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -540,26 +540,24 @@ static void udf_table_free_blocks(struct super_block *sb,
 			if (epos.offset + adsize > sb->s_blocksize) {
 				loffset = epos.offset;
 				aed->lengthAllocDescs = cpu_to_le32(adsize);
-				sptr = UDF_I_DATA(inode) + epos.offset -
-					udf_file_entry_alloc_offset(inode) +
-					UDF_I_LENEATTR(inode) - adsize;
+				sptr = UDF_I_DATA(table) + epos.offset - adsize;
 				dptr = epos.bh->b_data + sizeof(struct allocExtDesc);
 				memcpy(dptr, sptr, adsize);
 				epos.offset = sizeof(struct allocExtDesc) + adsize;
 			} else {
 				loffset = epos.offset + adsize;
 				aed->lengthAllocDescs = cpu_to_le32(0);
-				sptr = oepos.bh->b_data + epos.offset;
-				epos.offset = sizeof(struct allocExtDesc);
-
 				if (oepos.bh) {
+					sptr = oepos.bh->b_data + epos.offset;
 					aed = (struct allocExtDesc *)oepos.bh->b_data;
 					aed->lengthAllocDescs =
 						cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
 				} else {
+					sptr = UDF_I_DATA(table) + epos.offset;
 					UDF_I_LENALLOC(table) += adsize;
 					mark_inode_dirty(table);
 				}
+				epos.offset = sizeof(struct allocExtDesc);
 			}
 			if (UDF_SB_UDFREV(sb) >= 0x0200)
 				udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 382be7be5ae..c68a6e730b9 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -89,7 +89,7 @@ static int udf_find_fileset(struct super_block *, kernel_lb_addr *,
 static void udf_load_pvoldesc(struct super_block *, struct buffer_head *);
 static void udf_load_fileset(struct super_block *, struct buffer_head *,
 			     kernel_lb_addr *);
-static void udf_load_partdesc(struct super_block *, struct buffer_head *);
+static int udf_load_partdesc(struct super_block *, struct buffer_head *);
 static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
@@ -877,7 +877,7 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
 		  root->logicalBlockNum, root->partitionReferenceNum);
 }
 
-static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
+static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 {
 	struct partitionDesc *p;
 	int i;
@@ -912,6 +912,11 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 
 					UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table =
 						udf_iget(sb, loc);
+					if (!UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table) {
+						udf_debug("cannot load unallocSpaceTable (part %d)\n",
+							i);
+						return 1;
+					}
 					UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_TABLE;
 					udf_debug("unallocSpaceTable (part %d) @ %ld\n",
 						  i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table->i_ino);
@@ -938,6 +943,11 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 
 					UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table =
 						udf_iget(sb, loc);
+					if (!UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table) {
+						udf_debug("cannot load freedSpaceTable (part %d)\n",
+							i);
+						return 1;
+					}
 					UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_TABLE;
 					udf_debug("freedSpaceTable (part %d) @ %ld\n",
 						  i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table->i_ino);
@@ -966,6 +976,7 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 			  le16_to_cpu(p->partitionNumber), i, UDF_SB_PARTTYPE(sb,i),
 			  UDF_SB_PARTROOT(sb,i), UDF_SB_PARTLEN(sb,i));
 	}
+	return 0;
 }
 
 static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh,
@@ -1177,12 +1188,19 @@ static int udf_process_sequence(struct super_block *sb, long block, long lastblo
 				udf_load_logicalvol(sb, bh, fileset);
 			} else if (i == VDS_POS_PARTITION_DESC) {
 				struct buffer_head *bh2 = NULL;
-				udf_load_partdesc(sb, bh);
+				if (udf_load_partdesc(sb, bh)) {
+					brelse(bh);
+					return 1;
+				}
 				for (j = vds[i].block + 1; j <  vds[VDS_POS_TERMINATING_DESC].block; j++) {
 					bh2 = udf_read_tagged(sb, j, j, &ident);
 					gd = (struct generic_desc *)bh2->b_data;
 					if (ident == TAG_IDENT_PD)
-						udf_load_partdesc(sb, bh2);
+						if (udf_load_partdesc(sb, bh2)) {
+							brelse(bh);
+							brelse(bh2);
+							return 1;
+						}
 					brelse(bh2);
 				}
 			}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 73402c5eeb8..38eb0b7a1f3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -894,7 +894,7 @@ magic_found:
 		goto again;
 	}
 
-
+	sbi->s_flags = flags;/*after that line some functions use s_flags*/
 	ufs_print_super_stuff(sb, usb1, usb2, usb3);
 
 	/*
@@ -1025,8 +1025,6 @@ magic_found:
 	    UFS_MOUNT_UFSTYPE_44BSD)
 		uspi->s_maxsymlinklen =
 		    fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen);
-	
-	sbi->s_flags = flags;
 
 	inode = iget(sb, UFS_ROOTINO);
 	if (!inode || is_bad_inode(inode))
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index b4acc7f3c37..e6ea293f303 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
-	return (gfp_mask & __GFP_WAIT);
+	return (gfp_mask & __GFP_WAIT) != 0;
 }
 
 #endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fd4105d662e..5f152f60d74 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
 		ip->i_d.di_size = isize;
 		ip->i_update_core = 1;
 		ip->i_update_size = 1;
+		mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
 	}
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -652,7 +653,7 @@ xfs_probe_cluster(
 
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
-			size_t pg_offset, len = 0;
+			size_t pg_offset, pg_len = 0;
 
 			if (tindex == tlast) {
 				pg_offset =
@@ -665,16 +666,16 @@ xfs_probe_cluster(
 				pg_offset = PAGE_CACHE_SIZE;
 
 			if (page->index == tindex && !TestSetPageLocked(page)) {
-				len = xfs_probe_page(page, pg_offset, mapped);
+				pg_len = xfs_probe_page(page, pg_offset, mapped);
 				unlock_page(page);
 			}
 
-			if (!len) {
+			if (!pg_len) {
 				done = 1;
 				break;
 			}
 
-			total += len;
+			total += pg_len;
 			tindex++;
 		}
 
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index bb72c3d4141..81565dea9af 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,7 +46,7 @@ xfs_param_t xfs_params = {
 	.inherit_nosym	= {	0,		0,		1	},
 	.rotorstep	= {	1,		1,		255	},
 	.inherit_nodfrg	= {	0,		1,		1	},
-	.fstrm_timer	= {	1,		50,		3600*100},
+	.fstrm_timer	= {	1,		30*100,		3600*100},
 };
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f30..491d1f4f202 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
 
 	if (vp) {
 		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-		if (sync)
+		if (sync) {
+			filemap_fdatawait(inode->i_mapping);
 			flags |= FLUSH_SYNC;
+		}
 		error = bhv_vop_iflush(vp, flags);
 		if (error == EAGAIN)
 			error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d274b23ade..6ff0f4de163 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -120,7 +120,8 @@ xfs_Gqm_init(void)
 	 * Initialize the dquot hash tables.
 	 */
 	udqhash = kmem_zalloc_greedy(&hsize,
-				     XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH,
+				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
 				     KM_SLEEP | KM_MAYFAIL | KM_LARGE);
 	gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
 	hsize /= sizeof(xfs_dqhash_t);
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index a27a7c8c052..855da040864 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -34,10 +34,10 @@ extern void cmn_err(int, char *, ...)
 extern void assfail(char *expr, char *f, int l);
 
 #define ASSERT_ALWAYS(expr)	\
-	(unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
 #ifndef DEBUG
-# define ASSERT(expr)	((void)0)
+#define ASSERT(expr)	((void)0)
 
 #ifndef STATIC
 # define STATIC static noinline
@@ -49,8 +49,10 @@ extern void assfail(char *expr, char *f, int l);
 
 #else /* DEBUG */
 
-# define ASSERT(expr)	ASSERT_ALWAYS(expr)
-# include <linux/random.h>
+#include <linux/random.h>
+
+#define ASSERT(expr)	\
+	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
 #ifndef STATIC
 # define STATIC noinline
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index aea37df4aa6..26d09e2e1a7 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1975,7 +1975,6 @@ xfs_da_do_buf(
 		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
 		if (unlikely(error == EFSCORRUPTED)) {
 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
-				int	i;
 				cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
 					(long long)bno);
 				cmn_err(CE_ALERT, "dir: inode %lld\n",
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb..36d8f6aa11a 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -350,9 +350,10 @@ _xfs_filestream_update_ag(
 /* xfs_fstrm_free_func(): callback for freeing cached stream items. */
 void
 xfs_fstrm_free_func(
-	xfs_ino_t	ino,
-	fstrm_item_t	*item)
+	unsigned long	ino,
+	void		*data)
 {
+	fstrm_item_t	*item  = (fstrm_item_t *)data;
 	xfs_inode_t	*ip = item->ip;
 	int ref;
 
@@ -438,7 +439,7 @@ xfs_filestream_mount(
 	grp_count = 10;
 
 	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
-	                     (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
+	                     xfs_fstrm_free_func);
 
 	return err;
 }
@@ -467,8 +468,7 @@ void
 xfs_filestream_flush(
 	xfs_mount_t	*mp)
 {
-	/* point in time flush, so keep the reaper running */
-	xfs_mru_cache_flush(mp->m_filestream, 1);
+	xfs_mru_cache_flush(mp->m_filestream);
 }
 
 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9d4c4fbeb3e..9bfb69e1e88 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2185,13 +2185,13 @@ xlog_state_do_callback(
 			}
 			cb = iclog->ic_callback;
 
-			while (cb != 0) {
+			while (cb) {
 				iclog->ic_callback_tail = &(iclog->ic_callback);
 				iclog->ic_callback = NULL;
 				LOG_UNLOCK(log, s);
 
 				/* perform callbacks in the order given */
-				for (; cb != 0; cb = cb_next) {
+				for (; cb; cb = cb_next) {
 					cb_next = cb->cb_next;
 					cb->cb_func(cb->cb_arg, aborted);
 				}
@@ -2202,7 +2202,7 @@ xlog_state_do_callback(
 			loopdidcallbacks++;
 			funcdidcallbacks++;
 
-			ASSERT(iclog->ic_callback == 0);
+			ASSERT(iclog->ic_callback == NULL);
 			if (!(iclog->ic_state & XLOG_STATE_IOERROR))
 				iclog->ic_state = XLOG_STATE_DIRTY;
 
@@ -3242,10 +3242,10 @@ xlog_ticket_put(xlog_t		*log,
 #else
 	/* When we debug, it is easier if tickets are cycled */
 	ticket->t_next     = NULL;
-	if (log->l_tail != 0) {
+	if (log->l_tail) {
 		log->l_tail->t_next = ticket;
 	} else {
-		ASSERT(log->l_freelist == 0);
+		ASSERT(log->l_freelist == NULL);
 		log->l_freelist = ticket;
 	}
 	log->l_tail	    = ticket;
@@ -3463,7 +3463,7 @@ xlog_verify_iclog(xlog_t	 *log,
 	s = LOG_LOCK(log);
 	icptr = log->l_iclog;
 	for (i=0; i < log->l_iclog_bufs; i++) {
-		if (icptr == 0)
+		if (icptr == NULL)
 			xlog_panic("xlog_verify_iclog: invalid ptr");
 		icptr = icptr->ic_next;
 	}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fddbb091a86..8ae6e8e5f3d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1366,7 +1366,7 @@ xlog_recover_add_to_cont_trans(
 	int			old_len;
 
 	item = trans->r_itemq;
-	if (item == 0) {
+	if (item == NULL) {
 		/* finish copying rest of trans header */
 		xlog_recover_add_item(&trans->r_itemq);
 		ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1412,7 +1412,7 @@ xlog_recover_add_to_trans(
 	if (!len)
 		return 0;
 	item = trans->r_itemq;
-	if (item == 0) {
+	if (item == NULL) {
 		ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
 		if (len == sizeof(xfs_trans_header_t))
 			xlog_recover_add_item(&trans->r_itemq);
@@ -1467,12 +1467,12 @@ xlog_recover_unlink_tid(
 	xlog_recover_t		*tp;
 	int			found = 0;
 
-	ASSERT(trans != 0);
+	ASSERT(trans != NULL);
 	if (trans == *q) {
 		*q = (*q)->r_next;
 	} else {
 		tp = *q;
-		while (tp != 0) {
+		while (tp) {
 			if (tp->r_next == trans) {
 				found = 1;
 				break;
@@ -1495,7 +1495,7 @@ xlog_recover_insert_item_backq(
 	xlog_recover_item_t	**q,
 	xlog_recover_item_t	*item)
 {
-	if (*q == 0) {
+	if (*q == NULL) {
 		item->ri_prev = item->ri_next = item;
 		*q = item;
 	} else {
@@ -1899,7 +1899,7 @@ xlog_recover_do_reg_buffer(
 			break;
 		nbits = xfs_contig_bits(data_map, map_size, bit);
 		ASSERT(nbits > 0);
-		ASSERT(item->ri_buf[i].i_addr != 0);
+		ASSERT(item->ri_buf[i].i_addr != NULL);
 		ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
 		ASSERT(XFS_BUF_COUNT(bp) >=
 		       ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd..e0b358c1c53 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
 	 */
 	if (!_xfs_mru_cache_migrate(mru, now)) {
 		mru->time_zero = now;
-		if (!mru->next_reap)
-			mru->next_reap = mru->grp_count * mru->grp_time;
+		if (!mru->queued) {
+			mru->queued = 1;
+			queue_delayed_work(xfs_mru_reap_wq, &mru->work,
+			                   mru->grp_count * mru->grp_time);
+		}
 	} else {
 		grp = (now - mru->time_zero) / mru->grp_time;
 		grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
 	struct work_struct	*work)
 {
 	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work);
-	unsigned long		now;
+	unsigned long		now, next;
 
 	ASSERT(mru && mru->lists);
 	if (!mru || !mru->lists)
 		return;
 
 	mutex_spinlock(&mru->lock);
-	now = jiffies;
-	if (mru->reap_all ||
-	    (mru->next_reap && time_after(now, mru->next_reap))) {
-		if (mru->reap_all)
-			now += mru->grp_count * mru->grp_time * 2;
-		mru->next_reap = _xfs_mru_cache_migrate(mru, now);
-		_xfs_mru_cache_clear_reap_list(mru);
+	next = _xfs_mru_cache_migrate(mru, jiffies);
+	_xfs_mru_cache_clear_reap_list(mru);
+
+	mru->queued = next;
+	if ((mru->queued > 0)) {
+		now = jiffies;
+		if (next <= now)
+			next = 0;
+		else
+			next -= now;
+		queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
 	}
 
-	/*
-	 * the process that triggered the reap_all is responsible
-	 * for restating the periodic reap if it is required.
-	 */
-	if (!mru->reap_all)
-		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-	mru->reap_all = 0;
 	mutex_spinunlock(&mru->lock, 0);
 }
 
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
 
 	/* An extra list is needed to avoid reaping up to a grp_time early. */
 	mru->grp_count = grp_count + 1;
-	mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+	mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
 
 	if (!mru->lists) {
 		err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
 	mru->grp_time  = grp_time;
 	mru->free_func = free_func;
 
-	/* start up the reaper event */
-	mru->next_reap = 0;
-	mru->reap_all = 0;
-	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-
 	*mrup = mru;
 
 exit:
@@ -394,35 +389,25 @@ exit:
  * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
  * free functions as they're deleted.  When this function returns, the caller is
  * guaranteed that all the free functions for all the elements have finished
- * executing.
- *
- * While we are flushing, we stop the periodic reaper event from triggering.
- * Normally, we want to restart this periodic event, but if we are shutting
- * down the cache we do not want it restarted. hence the restart parameter
- * where 0 = do not restart reaper and 1 = restart reaper.
+ * executing and the reaper is not running.
  */
 void
 xfs_mru_cache_flush(
-	xfs_mru_cache_t		*mru,
-	int			restart)
+	xfs_mru_cache_t		*mru)
 {
 	if (!mru || !mru->lists)
 		return;
 
-	cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
-
 	mutex_spinlock(&mru->lock);
-	mru->reap_all = 1;
-	mutex_spinunlock(&mru->lock, 0);
+	if (mru->queued) {
+		mutex_spinunlock(&mru->lock, 0);
+		cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+		mutex_spinlock(&mru->lock);
+	}
 
-	queue_work(xfs_mru_reap_wq, &mru->work.work);
-	flush_workqueue(xfs_mru_reap_wq);
+	_xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
+	_xfs_mru_cache_clear_reap_list(mru);
 
-	mutex_spinlock(&mru->lock);
-	WARN_ON_ONCE(mru->reap_all != 0);
-	mru->reap_all = 0;
-	if (restart)
-		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
 	mutex_spinunlock(&mru->lock, 0);
 }
 
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
 	if (!mru || !mru->lists)
 		return;
 
-	/* we don't want the reaper to restart here */
-	xfs_mru_cache_flush(mru, 0);
+	xfs_mru_cache_flush(mru);
 
 	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
 	kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e..dd58ea1bbeb 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
 	unsigned int		grp_time;  /* Time period spanned by grps.  */
 	unsigned int		lru_grp;   /* Group containing time zero.   */
 	unsigned long		time_zero; /* Time first element was added. */
-	unsigned long		next_reap; /* Time that the reaper should
-					      next do something. */
-	unsigned int		reap_all;  /* if set, reap all lists */
 	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
 	struct delayed_work	work;      /* Workqueue data for reaping.   */
+	unsigned int		queued;	   /* work has been queued */
 } xfs_mru_cache_t;
 
 int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
 int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
 			     unsigned int grp_count,
 			     xfs_mru_cache_free_func_t free_func);
-void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
 				void *value);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b..60345922990 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return XFS_ERROR(EIO);
 
+	if (flag & FSYNC_DATA)
+		filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+
 	/*
 	 * We always need to make sure that the required inode state
 	 * is safe on disk.  The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
 			sync_lsn = log->l_last_sync_lsn;
 			GRANT_UNLOCK(log, s);
 
-			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
-				return 0;
+			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
+				if (flags & FLUSH_SYNC)
+					log_flags |= XFS_LOG_SYNC;
+				error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+				if (error)
+					return error;
+			}
 
-			if (flags & FLUSH_SYNC)
-				log_flags |= XFS_LOG_SYNC;
-			return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+			if (ip->i_update_core == 0)
+				return 0;
 		}
 	}
 
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
 	if (flags & FLUSH_INODE) {
 		int	flush_flags;
 
-		if (xfs_ipincount(ip))
-			return EAGAIN;
-
 		if (flags & FLUSH_SYNC) {
 			xfs_ilock(ip, XFS_ILOCK_SHARED);
 			xfs_iflock(ip);
author	David Woodhouse <dwmw2@infradead.org>	2007-10-13 14:43:54 +0100
committer	David Woodhouse <dwmw2@infradead.org>	2007-10-13 14:43:54 +0100
commit	b160292cc216a50fd0cd386b0bda2cd48352c73b (patch)
tree	ef07cf98f91353ee4c9ec1e1ca7a2a5d9d4b538a /fs
parent	b37bde147890c8fea8369a5a4e230dabdea4ebfb (diff)
parent	bbf25010f1a6b761914430f5fca081ec8c7accd1 (diff)