From 0a305e496059a113f93bdd3ad27a5aaa917fe34d Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 6 Jun 2012 11:17:59 +0100
Subject: GFS2: Extend the life of the reservations

This patch lengthens the lifespan of the reservations structure for
inodes. Before, they were allocated and deallocated for every write
operation. With this patch, they are allocated when the first write
occurs, and deallocated when the last process closes the file.
It's more efficient to do it this way because it saves GFS2 a lot of
unnecessary allocates and frees. It also gives us more flexibility
for the future: (1) we can now fold the qadata structure back into
the structure and save those alloc/frees, (2) we can use this for
multi-block reservations.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/aops.c  |  2 +-
 fs/gfs2/file.c  | 23 ++++++++++++++++---
 fs/gfs2/inode.c | 23 ++++++++++++++++++-
 fs/gfs2/quota.c | 13 +++++++++--
 fs/gfs2/rgrp.c  | 68 +++++++++++++++++++++++++++++++--------------------------
 fs/gfs2/rgrp.h  | 10 +++++++++
 fs/gfs2/super.c |  2 ++
 fs/gfs2/trans.h |  2 +-
 8 files changed, 104 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e80a464850c..aba77b5720b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	brelse(dibh);
 failed:
 	gfs2_trans_end(sdp);
-	if (ip->i_res)
+	if (gfs2_mb_reserved(ip))
 		gfs2_inplace_release(ip);
 	if (qa) {
 		gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 31b199f6efc..37906174d41 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 */
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		return ret;
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret)
@@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 	struct gfs2_file *fp;
+	struct gfs2_inode *ip = GFS2_I(inode);
 
 	fp = file->private_data;
 	file->private_data = NULL;
 
+	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
+	    (atomic_read(&inode->i_writecount) == 1))
+		gfs2_rs_delete(ip);
+
 	if (gfs2_assert_warn(sdp, fp))
 		return -EIO;
 
@@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	struct dentry *dentry = file->f_dentry;
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	int ret;
+
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		return ret;
 
 	if (file->f_flags & O_APPEND) {
-		struct dentry *dentry = file->f_dentry;
-		struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 		struct gfs2_holder gh;
-		int ret;
 
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 		if (ret)
@@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (bytes == 0)
 		bytes = sdp->sd_sb.sb_bsize;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 	error = gfs2_glock_nq(&ip->i_gh);
 	if (unlikely(error))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9ba2444e07..2a1b4b5a648 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
+	error = gfs2_rs_alloc(dip);
+	if (error)
+		return error;
+
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (error)
 		goto fail;
@@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock2;
 
+	/* the new inode needs a reservation so it can allocate xattrs. */
+	error = gfs2_rs_alloc(GFS2_I(inode));
+	if (error)
+		goto fail_gunlock2;
+
 	error = gfs2_acl_create(dip, inode);
 	if (error)
 		goto fail_gunlock2;
@@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	gfs2_trans_end(sdp);
 	/* Check if we reserved space in the rgrp. Function link_dinode may
 	   not, depending on whether alloc is required. */
-	if (dip->i_res)
+	if (gfs2_mb_reserved(dip))
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_qadata_put(dip);
@@ -819,6 +828,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
+	error = gfs2_rs_alloc(dip);
+	if (error)
+		return error;
+
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
@@ -1234,6 +1247,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		return error;
 
+	error = gfs2_rs_alloc(ndip);
+	if (error)
+		return error;
+
 	if (odip != ndip) {
 		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
 					   0, &r_gh);
@@ -1644,6 +1661,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct gfs2_holder i_gh;
 	int error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
 		return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e7d39..197cc2dade7 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -764,6 +764,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	unsigned int nalloc = 0, blocks;
 	int error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
 			      &data_blocks, &ind_blocks);
 
@@ -1549,10 +1553,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 	if (error)
 		return error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		goto out_put;
+
 	mutex_lock(&ip->i_inode.i_mutex);
 	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
 	if (error)
-		goto out_put;
+		goto out_unlockput;
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
 		goto out_q;
@@ -1609,8 +1617,9 @@ out_i:
 	gfs2_glock_dq_uninit(&i_gh);
 out_q:
 	gfs2_glock_dq_uninit(&q_gh);
-out_put:
+out_unlockput:
 	mutex_unlock(&ip->i_inode.i_mutex);
+out_put:
 	qd_put(qd);
 	return error;
 }
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f74fb9bd197..e944fefbc9a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 	}
 }
 
+/**
+ * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * @ip: the inode for this reservation
+ */
+int gfs2_rs_alloc(struct gfs2_inode *ip)
+{
+	int error = 0;
+
+	down_write(&ip->i_rw_mutex);
+	if (!ip->i_res) {
+		ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+		if (!ip->i_res)
+			error = -ENOMEM;
+	}
+	up_write(&ip->i_rw_mutex);
+	return error;
+}
+
+/**
+ * gfs2_rs_delete - delete a reservation
+ * @ip: The inode for this reservation
+ *
+ */
+void gfs2_rs_delete(struct gfs2_inode *ip)
+{
+	down_write(&ip->i_rw_mutex);
+	if (ip->i_res) {
+		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
+		ip->i_res = NULL;
+	}
+	up_write(&ip->i_rw_mutex);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
 	struct rb_node *n;
@@ -992,22 +1025,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
 	return ip->i_qadata;
 }
 
-/**
- * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
- * @ip: the incore GFS2 inode structure
- *
- * Returns: the struct gfs2_qadata
- */
-
-static int gfs2_blkrsv_get(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_res != NULL);
-	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-	if (!ip->i_res)
-		return -ENOMEM;
-	return 0;
-}
-
 /**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
@@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 	return -ENOSPC;
 }
 
-static void gfs2_blkrsv_put(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_res == NULL);
-	kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
-	ip->i_res = NULL;
-}
-
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_blkreserv *rs;
-	int error;
+	int error = 0;
 	u64 last_unlinked = NO_BLOCK;
 	int tries = 0;
 
-	error = gfs2_blkrsv_get(ip);
-	if (error)
-		return error;
-
 	rs = ip->i_res;
 	rs->rs_requested = requested;
 	if (gfs2_assert_warn(sdp, requested)) {
@@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 out:
 	if (error)
-		gfs2_blkrsv_put(ip);
+		rs->rs_requested = 0;
 	return error;
 }
 
@@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-	gfs2_blkrsv_put(ip);
+	rs->rs_requested = 0;
 }
 
 /**
@@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	/* Only happens if there is a bug in gfs2, return something distinctive
 	 * to ensure that it is noticed.
 	 */
-	if (ip->i_res == NULL)
+	if (ip->i_res->rs_requested == 0)
 		return -ECANCELED;
 
 	rgd = ip->i_rgd;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4b10f4de25..d9eda5f9ef2 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
+extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
+/* This is how to tell if a reservation is "inplace" reserved: */
+static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
+{
+	if (ip->i_res && ip->i_res->rs_requested)
+		return 1;
+	return 0;
+}
+
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621c240..65578df2944 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1554,6 +1554,7 @@ out_unlock:
 out:
 	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
+	gfs2_rs_delete(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
@@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 		ip->i_flags = 0;
 		ip->i_gl = NULL;
 		ip->i_rgd = NULL;
+		ip->i_res = NULL;
 	}
 	return &ip->i_inode;
 }
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 125d4572e1c..41f42cdccbb 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -31,7 +31,7 @@ struct gfs2_glock;
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
 {
 	const struct gfs2_blkreserv *rs = ip->i_res;
-	if (rs->rs_requested < ip->i_rgd->rd_length)
+	if (rs && rs->rs_requested < ip->i_rgd->rd_length)
 		return rs->rs_requested + 1;
 	return ip->i_rgd->rd_length;
 }
-- 
cgit v1.2.3-70-g09d2


From 5407e24229408d7586ee451a384fc13e4a2332be Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Fri, 18 May 2012 09:28:23 -0400
Subject: GFS2: Fold quota data into the reservations struct

This patch moves the ancillary quota data structures into the
block reservations structure. This saves GFS2 some time and
effort in allocating and deallocating the qadata structure.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/aops.c   | 16 ++-------------
 fs/gfs2/bmap.c   | 18 +++++------------
 fs/gfs2/dir.c    |  9 +--------
 fs/gfs2/file.c   | 22 ++++++---------------
 fs/gfs2/incore.h | 13 +++++--------
 fs/gfs2/inode.c  | 44 ++++--------------------------------------
 fs/gfs2/main.c   |  1 -
 fs/gfs2/quota.c  | 59 ++++++++++++++++++++++++++++----------------------------
 fs/gfs2/rgrp.c   | 19 ------------------
 fs/gfs2/rgrp.h   |  8 --------
 fs/gfs2/super.c  |  9 +--------
 fs/gfs2/xattr.c  | 24 ++---------------------
 12 files changed, 56 insertions(+), 186 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aba77b5720b..d6526347d38 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	int alloc_required;
 	int error = 0;
-	struct gfs2_qadata *qa = NULL;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	struct page *page;
@@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 
 	if (alloc_required) {
-		qa = gfs2_qadata_get(ip);
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_unlock;
-		}
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_alloc_put;
+			goto out_unlock;
 
 		error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
 		if (error)
@@ -708,8 +701,6 @@ out_trans_fail:
 		gfs2_inplace_release(ip);
 out_qunlock:
 		gfs2_quota_unlock(ip);
-out_alloc_put:
-		gfs2_qadata_put(ip);
 	}
 out_unlock:
 	if (&ip->i_inode == sdp->sd_rindex) {
@@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct buffer_head *dibh;
-	struct gfs2_qadata *qa = ip->i_qadata;
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
@@ -880,10 +870,8 @@ failed:
 	gfs2_trans_end(sdp);
 	if (gfs2_mb_reserved(ip))
 		gfs2_inplace_release(ip);
-	if (qa) {
+	if (ip->i_res->rs_qa_qd_num)
 		gfs2_quota_unlock(ip);
-		gfs2_qadata_put(ip);
-	}
 	if (inode == sdp->sd_rindex) {
 		gfs2_glock_dq(&m_ip->i_gh);
 		gfs2_holder_uninit(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index dab54099dd9..6d957a86482 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1045,12 +1045,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 		lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
 
 	find_metapath(sdp, lblock, &mp, ip->i_height);
-	if (!gfs2_qadata_get(ip))
-		return -ENOMEM;
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
 
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out;
+		return error;
 
 	while (height--) {
 		struct strip_mine sm;
@@ -1064,8 +1065,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 
 	gfs2_quota_unhold(ip);
 
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -1167,19 +1166,14 @@ static int do_grow(struct inode *inode, u64 size)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh;
-	struct gfs2_qadata *qa = NULL;
 	int error;
 	int unstuff = 0;
 
 	if (gfs2_is_stuffed(ip) &&
 	    (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-		qa = gfs2_qadata_get(ip);
-		if (qa == NULL)
-			return -ENOMEM;
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto do_grow_alloc_put;
+			return error;
 
 		error = gfs2_inplace_reserve(ip, 1);
 		if (error)
@@ -1214,8 +1208,6 @@ do_grow_release:
 		gfs2_inplace_release(ip);
 do_grow_qunlock:
 		gfs2_quota_unlock(ip);
-do_grow_alloc_put:
-		gfs2_qadata_put(ip);
 	}
 	return error;
 }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8aaeb07a07b..259b088cfc4 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 	if (!ht)
 		return -ENOMEM;
 
-	if (!gfs2_qadata_get(dip)) {
-		error = -ENOMEM;
-		goto out;
-	}
-
 	error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out_put;
+		goto out;
 
 	/*  Count the number of leaves  */
 	bh = leaf_bh;
@@ -1942,8 +1937,6 @@ out_rg_gunlock:
 out_rlist:
 	gfs2_rlist_free(&rlist);
 	gfs2_quota_unhold(dip);
-out_put:
-	gfs2_qadata_put(dip);
 out:
 	kfree(ht);
 	return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 37906174d41..26e2905070e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -366,7 +366,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	u64 pos = page->index << PAGE_CACHE_SHIFT;
 	unsigned int data_blocks, ind_blocks, rblocks;
 	struct gfs2_holder gh;
-	struct gfs2_qadata *qa;
 	loff_t size;
 	int ret;
 
@@ -397,14 +396,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto out_unlock;
 	}
 
-	ret = -ENOMEM;
-	qa = gfs2_qadata_get(ip);
-	if (qa == NULL)
+	ret = gfs2_rindex_update(sdp);
+	if (ret)
 		goto out_unlock;
 
 	ret = gfs2_quota_lock_check(ip);
 	if (ret)
-		goto out_alloc_put;
+		goto out_unlock;
 	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
 	ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
 	if (ret)
@@ -451,8 +449,6 @@ out_trans_fail:
 	gfs2_inplace_release(ip);
 out_quota_unlock:
 	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_qadata_put(ip);
 out_unlock:
 	gfs2_glock_dq(&gh);
 out:
@@ -764,7 +760,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	loff_t bytes, max_bytes;
-	struct gfs2_qadata *qa;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -804,15 +799,13 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 			offset += bytes;
 			continue;
 		}
-		qa = gfs2_qadata_get(ip);
-		if (!qa) {
-			error = -ENOMEM;
+		error = gfs2_rindex_update(sdp);
+		if (error)
 			goto out_unlock;
-		}
 
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_alloc_put;
+			goto out_unlock;
 
 retry:
 		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -852,7 +845,6 @@ retry:
 		offset += max_bytes;
 		gfs2_inplace_release(ip);
 		gfs2_quota_unlock(ip);
-		gfs2_qadata_put(ip);
 	}
 
 	if (error == 0)
@@ -863,8 +855,6 @@ out_trans_fail:
 	gfs2_inplace_release(ip);
 out_qunlock:
 	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_qadata_put(ip);
 out_unlock:
 	gfs2_glock_dq(&ip->i_gh);
 out_uninit:
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67fd6beffec..5cda51a3e3b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -289,16 +289,14 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
 
-struct gfs2_qadata { /* quota allocation data */
-	/* Quota stuff */
-	struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
-	struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
-	unsigned int qa_qd_num;
-};
-
 struct gfs2_blkreserv {
 	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
 	struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
+
+	/* ancillary quota stuff */
+	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+	unsigned int rs_qa_qd_num;
 };
 
 enum {
@@ -319,7 +317,6 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_qadata *i_qadata; /* quota allocation data */
 	struct gfs2_blkreserv *i_res; /* resource group block reservation */
 	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2a1b4b5a648..2b035e0959b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,12 +521,10 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	int error;
 
 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
-	if (!gfs2_qadata_get(dip))
-		return -ENOMEM;
 
 	error = gfs2_quota_lock(dip, uid, gid);
 	if (error)
-		goto out;
+		return error;
 
 	error = gfs2_quota_check(dip, uid, gid);
 	if (error)
@@ -542,8 +540,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 out_quota:
 	gfs2_quota_unlock(dip);
-out:
-	gfs2_qadata_put(dip);
 	return error;
 }
 
@@ -551,15 +547,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 		       struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_qadata *qa;
 	int alloc_required;
 	struct buffer_head *dibh;
 	int error;
 
-	qa = gfs2_qadata_get(dip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto fail;
@@ -611,7 +602,6 @@ fail_quota_locks:
 	gfs2_quota_unlock(dip);
 
 fail:
-	gfs2_qadata_put(dip);
 	return error;
 }
 
@@ -734,7 +724,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (gfs2_mb_reserved(dip))
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
-	gfs2_qadata_put(dip);
 	mark_inode_dirty(inode);
 	gfs2_glock_dq_uninit_m(2, ghs);
 	d_instantiate(dentry, inode);
@@ -883,16 +872,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	error = 0;
 
 	if (alloc_required) {
-		struct gfs2_qadata *qa = gfs2_qadata_get(dip);
-
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
 		error = gfs2_quota_lock_check(dip);
 		if (error)
-			goto out_alloc;
+			goto out_gunlock;
 
 		error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
 		if (error)
@@ -935,9 +917,6 @@ out_ipres:
 out_gunlock_q:
 	if (alloc_required)
 		gfs2_quota_unlock(dip);
-out_alloc:
-	if (alloc_required)
-		gfs2_qadata_put(dip);
 out_gunlock:
 	gfs2_glock_dq(ghs + 1);
 out_child:
@@ -1374,16 +1353,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		goto out_gunlock;
 
 	if (alloc_required) {
-		struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
-
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
 		error = gfs2_quota_lock_check(ndip);
 		if (error)
-			goto out_alloc;
+			goto out_gunlock;
 
 		error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
 		if (error)
@@ -1444,9 +1416,6 @@ out_ipreserv:
 out_gunlock_q:
 	if (alloc_required)
 		gfs2_quota_unlock(ndip);
-out_alloc:
-	if (alloc_required)
-		gfs2_qadata_put(ndip);
 out_gunlock:
 	while (x--) {
 		gfs2_glock_dq(ghs + x);
@@ -1607,12 +1576,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
 		ogid = ngid = NO_QUOTA_CHANGE;
 
-	if (!gfs2_qadata_get(ip))
-		return -ENOMEM;
-
 	error = gfs2_quota_lock(ip, nuid, ngid);
 	if (error)
-		goto out_alloc;
+		return error;
 
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
 		error = gfs2_quota_check(ip, nuid, ngid);
@@ -1638,8 +1604,6 @@ out_end_trans:
 	gfs2_trans_end(sdp);
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
-out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6cdb0f2a1b0..e04d0e09ee7 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo)
 	inode_init_once(&ip->i_inode);
 	init_rwsem(&ip->i_rw_mutex);
 	INIT_LIST_HEAD(&ip->i_trunc_list);
-	ip->i_qadata = NULL;
 	ip->i_res = NULL;
 	ip->i_hash_cache = NULL;
 }
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 197cc2dade7..7d1ede7b66f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
-	struct gfs2_quota_data **qd = qa->qa_qd;
+	struct gfs2_quota_data **qd;
 	int error;
 
-	if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
+	if (ip->i_res == NULL)
+		gfs2_rs_alloc(ip);
+
+	qd = ip->i_res->rs_qa_qd;
+
+	if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
 	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
 		return -EIO;
 
@@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 	error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
 	if (error)
 		goto out;
-	qa->qa_qd_num++;
+	ip->i_res->rs_qa_qd_num++;
 	qd++;
 
 	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
 	if (error)
 		goto out;
-	qa->qa_qd_num++;
+	ip->i_res->rs_qa_qd_num++;
 	qd++;
 
 	if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
 		error = qdsb_get(sdp, QUOTA_USER, uid, qd);
 		if (error)
 			goto out;
-		qa->qa_qd_num++;
+		ip->i_res->rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 		error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
 		if (error)
 			goto out;
-		qa->qa_qd_num++;
+		ip->i_res->rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -542,16 +546,17 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	unsigned int x;
 
+	if (ip->i_res == NULL)
+		return;
 	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qdsb_put(qa->qa_qd[x]);
-		qa->qa_qd[x] = NULL;
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qdsb_put(ip->i_res->rs_qa_qd[x]);
+		ip->i_res->rs_qa_qd[x] = NULL;
 	}
-	qa->qa_qd_num = 0;
+	ip->i_res->rs_qa_qd_num = 0;
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -919,7 +924,6 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	unsigned int x;
 	int error = 0;
@@ -932,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
 		return 0;
 
-	sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
-	     sort_qd, NULL);
+	sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
+	     sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
 		int force = NO_FORCE;
-		qd = qa->qa_qd[x];
+		qd = ip->i_res->rs_qa_qd[x];
 		if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
 			force = FORCE;
-		error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
+		error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
 		if (error)
 			break;
 	}
@@ -949,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 		set_bit(GIF_QD_LOCKED, &ip->i_flags);
 	else {
 		while (x--)
-			gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+			gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
 		gfs2_quota_unhold(ip);
 	}
 
@@ -994,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd)
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qda[4];
 	unsigned int count = 0;
 	unsigned int x;
@@ -1002,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 	if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
 		goto out;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
 		struct gfs2_quota_data *qd;
 		int sync;
 
-		qd = qa->qa_qd[x];
+		qd = ip->i_res->rs_qa_qd[x];
 		sync = need_sync(qd);
 
-		gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+		gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
 
 		if (sync && qd_trylock(qd))
 			qda[count++] = qd;
@@ -1042,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	s64 value;
 	unsigned int x;
@@ -1054,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
         if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                 return 0;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qd = qa->qa_qd[x];
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qd = ip->i_res->rs_qa_qd[x];
 
 		if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
 		      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1093,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 		       u32 uid, u32 gid)
 {
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	unsigned int x;
 
@@ -1102,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
 		return;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qd = qa->qa_qd[x];
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qd = ip->i_res->rs_qa_qd[x];
 
 		if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
 		    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e944fefbc9a..9eca6a9cff8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1006,25 +1006,6 @@ out:
 	return ret;
 }
 
-/**
- * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
- * @ip: the incore GFS2 inode structure
- *
- * Returns: the struct gfs2_qadata
- */
-
-struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	int error;
-	BUG_ON(ip->i_qadata != NULL);
-	ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
-	error = gfs2_rindex_update(sdp);
-	if (error)
-		fs_warn(sdp, "rindex update returns %d\n", error);
-	return ip->i_qadata;
-}
-
 /**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index d9eda5f9ef2..5d8314dbc89 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -29,14 +29,6 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
-extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
-static inline void gfs2_qadata_put(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_qadata == NULL);
-	kfree(ip->i_qadata);
-	ip->i_qadata = NULL;
-}
-
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 65578df2944..81fc76264ed 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1399,7 +1399,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa;
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder gh;
 	int error;
@@ -1409,13 +1408,9 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out;
+		return error;
 
 	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
 	if (!rgd) {
@@ -1443,8 +1438,6 @@ out_rg_gunlock:
 	gfs2_glock_dq_uninit(&gh);
 out_qs:
 	gfs2_quota_unhold(ip);
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 927f4df874a..523c0de0d80 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -325,13 +325,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 			       struct gfs2_ea_header *ea,
 			       struct gfs2_ea_header *prev, int leave)
 {
-	struct gfs2_qadata *qa;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto out_alloc;
@@ -340,7 +335,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	gfs2_quota_unhold(ip);
 out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -713,17 +707,12 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 			     unsigned int blks,
 			     ea_skeleton_call_t skeleton_call, void *private)
 {
-	struct gfs2_qadata *qa;
 	struct buffer_head *dibh;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_lock_check(ip);
 	if (error)
-		goto out;
+		return error;
 
 	error = gfs2_inplace_reserve(ip, blks);
 	if (error)
@@ -753,8 +742,6 @@ out_ipres:
 	gfs2_inplace_release(ip);
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -1494,16 +1481,11 @@ out_gunlock:
 
 int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
-	struct gfs2_qadata *qa;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out_alloc;
+		return error;
 
 	error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
 	if (error)
@@ -1519,8 +1501,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 
 out_quota:
 	gfs2_quota_unhold(ip);
-out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 23d0bb834e264f38335f19fe601564b8422431e7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Mon, 28 May 2012 15:26:56 +0100
Subject: GFS2: Add "top dir" flag support

This patch adds support for the "top dir" flag. Currently this is unused
but a subsequent patch is planned which will add support for the
Orlov allocation policy when allocating subdirectories in a parent
with this flag set.

In order to ensure backward compatible behaviour, mkfs.gfs2 does
not currently tag the root directory with this flag, it must always be
set manually.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c              | 4 ++++
 include/linux/gfs2_ondisk.h | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26e2905070e..6fbf3cbd974 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = {
 	[7] = GFS2_DIF_NOATIME,
 	[12] = GFS2_DIF_EXHASH,
 	[14] = GFS2_DIF_INHERIT_JDATA,
+	[17] = GFS2_DIF_TOPDIR,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = {
 	[gfs2fl_AppendOnly] = FS_APPEND_FL,
 	[gfs2fl_NoAtime] = FS_NOATIME_FL,
 	[gfs2fl_ExHash] = FS_INDEX_FL,
+	[gfs2fl_TopLevel] = FS_TOPDIR_FL,
 	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
 
@@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode)
 			     GFS2_DIF_NOATIME|			\
 			     GFS2_DIF_SYNC|			\
 			     GFS2_DIF_SYSTEM|			\
+			     GFS2_DIF_TOPDIR|			\
 			     GFS2_DIF_INHERIT_JDATA)
 
 /**
@@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 
 	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
 	if (!S_ISDIR(inode->i_mode)) {
+		gfsflags &= ~GFS2_DIF_TOPDIR;
 		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
 			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
 		return do_gfs2_set_flags(filp, gfsflags, ~0);
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index fa98bdb073b..e8ccf6ff3b4 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -214,6 +214,7 @@ enum {
 	gfs2fl_NoAtime		= 7,
 	gfs2fl_Sync		= 8,
 	gfs2fl_System		= 9,
+	gfs2fl_TopLevel		= 10,
 	gfs2fl_TruncInProg	= 29,
 	gfs2fl_InheritDirectio	= 30,
 	gfs2fl_InheritJdata	= 31,
@@ -230,8 +231,9 @@ enum {
 #define GFS2_DIF_NOATIME		0x00000080
 #define GFS2_DIF_SYNC			0x00000100
 #define GFS2_DIF_SYSTEM			0x00000200 /* New in gfs2 */
+#define GFS2_DIF_TOPDIR			0x00000400 /* New in gfs2 */
 #define GFS2_DIF_TRUNC_IN_PROG		0x20000000 /* New in gfs2 */
-#define GFS2_DIF_INHERIT_DIRECTIO	0x40000000
+#define GFS2_DIF_INHERIT_DIRECTIO	0x40000000 /* only in gfs1 */
 #define GFS2_DIF_INHERIT_JDATA		0x80000000
 
 struct gfs2_dinode {
-- 
cgit v1.2.3-70-g09d2


From 1b8ba31a88c5115687095ca2a01bfcaecb489b5a Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Tue, 29 May 2012 10:47:51 +0100
Subject: GFS2: Fix error handling when reading an invalid block from the
 journal

When we read an invalid block from the journal, we should not call
withdraw, but simply print a message and return an error. It is
up to the caller to then handle that error. In the case of mount
that means a failed mount, rather than a withdraw (requiring a
reboot). In the case of recovering another nodes journal then
we return an error via the uevent.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/lops.c |  9 +++++++--
 fs/gfs2/util.h | 18 +++++++-----------
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 852c1be1dd3..8ff95a2d54e 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 		goto out;
 	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-	gfs2_meta_check(sdp, bd->bd_bh);
-	gfs2_pin(sdp, bd->bd_bh);
 	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+	if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+		printk(KERN_ERR
+		       "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+		       (unsigned long long)bd->bd_bh->b_blocknr);
+		BUG();
+	}
+	gfs2_pin(sdp, bd->bd_bh);
 	mh->__pad0 = cpu_to_be64(0);
 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	sdp->sd_log_num_buf++;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 3586b0dd6aa..80535739ac7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		       const char *type, const char *function,
 		       char *file, unsigned int line);
 
-static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
-				    struct buffer_head *bh,
-				    const char *function,
-				    char *file, unsigned int line)
+static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
+				    struct buffer_head *bh)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
 	u32 magic = be32_to_cpu(mh->mh_magic);
-	if (unlikely(magic != GFS2_MAGIC))
-		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
-					  file, line);
+	if (unlikely(magic != GFS2_MAGIC)) {
+		printk(KERN_ERR "GFS2: Magic number missing at %llu\n",
+		       (unsigned long long)bh->b_blocknr);
+		return -EIO;
+	}
 	return 0;
 }
 
-#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
-
-
 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 			   u16 type, u16 t,
 			   const char *function,
-- 
cgit v1.2.3-70-g09d2


From df5d2f5560a9c33129391a136ed9f0ac26abe69b Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 7 Jun 2012 13:30:16 +0100
Subject: GFS2: Increase buffer size for glocks and glstats debugfs files

As per Al Viro's suggestion, this increases the buffer size used
for these two files. This provides a speed up of slightly less than
8x (i.e. proportional to the buffer size) for cases when we have
large numbers of glocks.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dab2526071c..1c4cddf42a6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1972,6 +1972,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
+		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		if (seq->buf)
+			seq->size = 8*PAGE_SIZE;
 	}
 	return ret;
 }
@@ -1984,6 +1987,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
+		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		if (seq->buf)
+			seq->size = 8*PAGE_SIZE;
 	}
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From ba1ddcb6ca0c46edd275790d1e4e2cfd6219ce19 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 8 Jun 2012 11:16:22 +0100
Subject: GFS2: Cache last hash bucket for glock seq_files

For the glocks and glstats seq_files, which are exposed via debugfs
we should cache the most recent hash bucket, along with the offset
into that bucket. This allows us to restart from that point, rather
than having to begin at the beginning each time.

This is an idea from Eric Dumazet, however I've slightly extended it
so that if the position from which we are due to start is at any
point beyond the last cached point, we start from the last cached
point, plus whatever is the appropriate offset. I don't really expect
people to be lseeking around these files, but if they did so with only
positive offsets, then we'd still get some of the benefit of using a
cached offset.

With my simple test of around 200k entries in the file, I'm seeing
an approx 10x speed up.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c4cddf42a6..3ad8cb3eeb8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -46,10 +46,12 @@
 #include "trace_gfs2.h"
 
 struct gfs2_glock_iter {
-	int hash;			/* hash bucket index         */
-	struct gfs2_sbd *sdp;		/* incore superblock         */
-	struct gfs2_glock *gl;		/* current glock struct      */
-	char string[512];		/* scratch space             */
+	int hash;			/* hash bucket index           */
+	unsigned nhash;			/* Index within current bucket */
+	struct gfs2_sbd *sdp;		/* incore superblock           */
+	struct gfs2_glock *gl;		/* current glock struct        */
+	loff_t last_pos;		/* last position               */
+	char string[512];		/* scratch space               */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -950,7 +952,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 	if (seq) {
 		struct gfs2_glock_iter *gi = seq->private;
 		vsprintf(gi->string, fmt, args);
-		seq_printf(seq, gi->string);
+		seq_puts(seq, gi->string);
 	} else {
 		vaf.fmt = fmt;
 		vaf.va = &args;
@@ -1854,8 +1856,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 		gl = gi->gl;
 		if (gl) {
 			gi->gl = glock_hash_next(gl);
+			gi->nhash++;
 		} else {
+			if (gi->hash >= GFS2_GL_HASH_SIZE) {
+				rcu_read_unlock();
+				return 1;
+			}
 			gi->gl = glock_hash_chain(gi->hash);
+			gi->nhash = 0;
 		}
 		while (gi->gl == NULL) {
 			gi->hash++;
@@ -1864,6 +1872,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 				return 1;
 			}
 			gi->gl = glock_hash_chain(gi->hash);
+			gi->nhash = 0;
 		}
 	/* Skip entries for other sb and dead entries */
 	} while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
@@ -1876,7 +1885,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 	struct gfs2_glock_iter *gi = seq->private;
 	loff_t n = *pos;
 
-	gi->hash = 0;
+	if (gi->last_pos <= *pos)
+		n = gi->nhash + (*pos - gi->last_pos);
+	else
+		gi->hash = 0;
+
+	gi->nhash = 0;
 	rcu_read_lock();
 
 	do {
@@ -1884,6 +1898,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 			return NULL;
 	} while (n--);
 
+	gi->last_pos = *pos;
 	return gi->gl;
 }
 
@@ -1893,7 +1908,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 	struct gfs2_glock_iter *gi = seq->private;
 
 	(*pos)++;
-
+	gi->last_pos = *pos;
 	if (gfs2_glock_iter_next(gi))
 		return NULL;
 
-- 
cgit v1.2.3-70-g09d2


From 90306c41dc3d8e5f12ecd0193dae99e0e7f6e896 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Tue, 29 May 2012 23:01:09 -0500
Subject: GFS2: Use lvbs for storing rgrp information with mount option

Instead of reading in the resource groups when gfs2 is checking
for free space to allocate from, gfs2 can store the necessary infromation
in the resource group's lvb.  Also, instead of searching for unlinked
inodes in every resource group that's checked for free space, gfs2 can
store the number of unlinked but inodes in the lvb, and only check for
unlinked inodes if it will find some.

The first time a resource group is locked, the lvb must initialized.
Since this involves counting the unlinked inodes in the resource group,
this takes a little extra time.  But after that, if the resource group
is locked with GL_SKIP, the buffer head won't be read in unless it's
actually needed.

Enabling the resource groups lvbs is done via the rgrplvb mount option.  If
this option isn't set, the lvbs will still be set and updated, but they won't
be verfied or used by the filesystem.  To safely turn on this option, all of
the nodes mounting the filesystem must be running code with this patch, and
the filesystem must have been completely unmounted since they were updated.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c             |   1 +
 fs/gfs2/incore.h            |   2 +
 fs/gfs2/rgrp.c              | 147 +++++++++++++++++++++++++++++++++++++++++---
 fs/gfs2/super.c             |  12 ++++
 include/linux/gfs2_ondisk.h |  10 +++
 5 files changed, 163 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3ad8cb3eeb8..10ae1645d9a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
 	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
 	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+	memset(gl->gl_lvb, 0, 32 * sizeof(char));
 	gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
 	gl->gl_tchange = jiffies;
 	gl->gl_object = NULL;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5cda51a3e3b..dc730700b3b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -89,6 +89,7 @@ struct gfs2_rgrpd {
 	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
 	struct gfs2_sbd *rd_sbd;
+	struct gfs2_rgrp_lvb *rd_rgl;
 	u32 rd_last_alloc;
 	u32 rd_flags;
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
@@ -470,6 +471,7 @@ struct gfs2_args {
 	unsigned int ar_discard:1;		/* discard requests */
 	unsigned int ar_errors:2;               /* errors=withdraw | panic */
 	unsigned int ar_nobarrier:1;            /* do not send barriers */
+	unsigned int ar_rgrplvb:1;		/* use lvbs for rgrp info */
 	int ar_commit;				/* Commit interval */
 	int ar_statfs_quantum;			/* The fast statfs interval */
 	int ar_quota_quantum;			/* The quota interval */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9eca6a9cff8..3c6f7ed16a3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -660,6 +660,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 		goto fail;
 
 	rgd->rd_gl->gl_object = rgd;
+	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 	if (rgd->rd_data > sdp->sd_max_rg_data)
 		sdp->sd_max_rg_data = rgd->rd_data;
@@ -769,9 +770,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
+static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+
+	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
+	    rgl->rl_dinodes != str->rg_dinodes ||
+	    rgl->rl_igeneration != str->rg_igeneration)
+		return 0;
+	return 1;
+}
+
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+	const struct gfs2_rgrp *str = buf;
+
+	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+	rgl->rl_flags = str->rg_flags;
+	rgl->rl_free = str->rg_free;
+	rgl->rl_dinodes = str->rg_dinodes;
+	rgl->rl_igeneration = str->rg_igeneration;
+	rgl->__pad = 0UL;
+}
+
+static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
+{
+	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
+	rgl->rl_unlinked = cpu_to_be32(unlinked);
+}
+
+static u32 count_unlinked(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_bitmap *bi;
+	const u32 length = rgd->rd_length;
+	const u8 *buffer = NULL;
+	u32 i, goal, count = 0;
+
+	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
+		goal = 0;
+		buffer = bi->bi_bh->b_data + bi->bi_offset;
+		WARN_ON(!buffer_uptodate(bi->bi_bh));
+		while (goal < bi->bi_len * GFS2_NBBY) {
+			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
+					   GFS2_BLKST_UNLINKED);
+			if (goal == BFITNOENT)
+				break;
+			count++;
+			goal++;
+		}
+	}
+
+	return count;
+}
+
+
 /**
- * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
- * @gh: The glock holder for the resource group
+ * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
  *
  * Read in all of a Resource Group's header and bitmap blocks.
  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -779,9 +836,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
  * Returns: errno
  */
 
-int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
-	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_glock *gl = rgd->rd_gl;
 	unsigned int length = rgd->rd_length;
@@ -789,6 +845,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
 	unsigned int x, y;
 	int error;
 
+	if (rgd->rd_bits[0].bi_bh != NULL)
+		return 0;
+
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -815,7 +874,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
 	}
-
+	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
+		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
+				     rgd->rd_bits[0].bi_bh->b_data);
+	}
+	else if (sdp->sd_args.ar_rgrplvb) {
+		if (!gfs2_rgrp_lvb_valid(rgd)){
+			gfs2_consist_rgrpd(rgd);
+			error = -EIO;
+			goto fail;
+		}
+		if (rgd->rd_rgl->rl_unlinked == 0)
+			rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	}
 	return 0;
 
 fail:
@@ -829,6 +901,39 @@ fail:
 	return error;
 }
 
+int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+{
+	u32 rl_flags;
+
+	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
+		return 0;
+
+	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+		return gfs2_rgrp_bh_get(rgd);
+
+	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
+	rl_flags &= ~GFS2_RDF_MASK;
+	rgd->rd_flags &= GFS2_RDF_MASK;
+	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+	if (rgd->rd_rgl->rl_unlinked == 0)
+		rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+	rgd->rd_free_clone = rgd->rd_free;
+	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
+	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
+	return 0;
+}
+
+int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+{
+	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+
+	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
+		return 0;
+	return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
+}
+
 /**
  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
  * @gh: The glock holder for the resource group
@@ -842,8 +947,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
 
 	for (x = 0; x < length; x++) {
 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
-		brelse(bi->bi_bh);
-		bi->bi_bh = NULL;
+		if (bi->bi_bh) {
+			brelse(bi->bi_bh);
+			bi->bi_bh = NULL;
+		}
 	}
 
 }
@@ -987,6 +1094,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 				gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
 				gfs2_rgrp_out(rgd, bh->b_data);
+				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
 				gfs2_trans_end(sdp);
 			}
 		}
@@ -1116,6 +1224,9 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 	int error, rg_locked, flags = LM_FLAG_TRY;
 	int loops = 0;
 
+	if (sdp->sd_args.ar_rgrplvb)
+		flags |= GL_SKIP;
+
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
 		rgd = begin = ip->i_rgd;
 	else
@@ -1133,22 +1244,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 		} else {
 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
 						   flags, &rs->rs_rgd_gh);
+			if (!error && sdp->sd_args.ar_rgrplvb) {
+				error = update_rgrp_lvb(rgd);
+				if (error) {
+					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+					return error;
+				}
+			}
 		}
 		switch (error) {
 		case 0:
 			if (try_rgrp_fit(rgd, ip)) {
+				if (sdp->sd_args.ar_rgrplvb)
+					gfs2_rgrp_bh_get(rgd);
 				ip->i_rgd = rgd;
 				return 0;
 			}
-			if (rgd->rd_flags & GFS2_RDF_CHECK)
+			if (rgd->rd_flags & GFS2_RDF_CHECK) {
+				if (sdp->sd_args.ar_rgrplvb)
+					gfs2_rgrp_bh_get(rgd);
 				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
 			if (rgd == begin) {
-				flags = 0;
+				flags &= ~LM_FLAG_TRY;
 				loops++;
 			}
 			break;
@@ -1529,6 +1652,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -1575,6 +1699,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
 	/* Directories keep their data in the metadata address space */
 	if (meta || ip->i_depth)
@@ -1611,6 +1736,8 @@ void gfs2_unlink_di(struct inode *inode)
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+	update_rgrp_lvb_unlinked(rgd, 1);
 }
 
 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1630,6 +1757,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+	update_rgrp_lvb_unlinked(rgd, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 81fc76264ed..788068758f3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -78,6 +78,8 @@ enum {
 	Opt_quota_quantum,
 	Opt_barrier,
 	Opt_nobarrier,
+	Opt_rgrplvb,
+	Opt_norgrplvb,
 	Opt_error,
 };
 
@@ -115,6 +117,8 @@ static const match_table_t tokens = {
 	{Opt_quota_quantum, "quota_quantum=%d"},
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_rgrplvb, "rgrplvb"},
+	{Opt_norgrplvb, "norgrplvb"},
 	{Opt_error, NULL}
 };
 
@@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
 		case Opt_nobarrier:
 			args->ar_nobarrier = 1;
 			break;
+		case Opt_rgrplvb:
+			args->ar_rgrplvb = 1;
+			break;
+		case Opt_norgrplvb:
+			args->ar_rgrplvb = 0;
+			break;
 		case Opt_error:
 		default:
 			printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
@@ -1379,6 +1389,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
 		seq_printf(s, ",nobarrier");
 	if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
 		seq_printf(s, ",demote_interface_used");
+	if (args->ar_rgrplvb)
+		seq_printf(s, ",rgrplvb");
 	return 0;
 }
 
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index e8ccf6ff3b4..b2de1f9a88d 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -170,6 +170,16 @@ struct gfs2_rindex {
 #define GFS2_RGF_NOALLOC	0x00000008
 #define GFS2_RGF_TRIMMED	0x00000010
 
+struct gfs2_rgrp_lvb {
+	__be32 rl_magic;
+	__be32 rl_flags;
+	__be32 rl_free;
+	__be32 rl_dinodes;
+	__be64 rl_igeneration;
+	__be32 rl_unlinked;
+	__be32 __pad;
+};
+
 struct gfs2_rgrp {
 	struct gfs2_meta_header rg_header;
 
-- 
cgit v1.2.3-70-g09d2


From a4808147dcf1ecf2f76212a78fd9692b3c112f47 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 11 Jun 2012 13:16:35 +0100
Subject: seq_file: Add seq_vprintf function and export it

The existing seq_printf function is rewritten in terms of the new
seq_vprintf which is also exported to modules. This allows GFS2
(and potentially other seq_file users) to have a vprintf based
interface and to avoid an extra copy into a temporary buffer in
some cases.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
---
 fs/seq_file.c            | 18 ++++++++++++++----
 include/linux/seq_file.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0cbd0494b79..14cf9de1dbe 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
 }
 EXPORT_SYMBOL(seq_escape);
 
-int seq_printf(struct seq_file *m, const char *f, ...)
+int seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
-	va_list args;
 	int len;
 
 	if (m->count < m->size) {
-		va_start(args, f);
 		len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
-		va_end(args);
 		if (m->count + len < m->size) {
 			m->count += len;
 			return 0;
@@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...)
 	seq_set_overflow(m);
 	return -1;
 }
+EXPORT_SYMBOL(seq_vprintf);
+
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, f);
+	ret = seq_vprintf(m, f, args);
+	va_end(args);
+
+	return ret;
+}
 EXPORT_SYMBOL(seq_printf);
 
 /**
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index fc61854f622..83c44eefe69 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -86,6 +86,7 @@ int seq_puts(struct seq_file *m, const char *s);
 int seq_write(struct seq_file *seq, const void *data, size_t len);
 
 __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
+__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);
 
 int seq_path(struct seq_file *, const struct path *, const char *);
 int seq_dentry(struct seq_file *, struct dentry *, const char *);
-- 
cgit v1.2.3-70-g09d2


From 1bb49303b7a82eb9bce0595087523343683abdf0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 11 Jun 2012 13:26:50 +0100
Subject: GFS2: Use seq_vprintf for glocks debugfs file

Make use of the newly added seq_vprintf() function.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
---
 fs/gfs2/glock.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 10ae1645d9a..4d5d63d9d2c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -51,7 +51,6 @@ struct gfs2_glock_iter {
 	struct gfs2_sbd *sdp;		/* incore superblock           */
 	struct gfs2_glock *gl;		/* current glock struct        */
 	loff_t last_pos;		/* last position               */
-	char string[512];		/* scratch space               */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -951,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 	va_start(args, fmt);
 
 	if (seq) {
-		struct gfs2_glock_iter *gi = seq->private;
-		vsprintf(gi->string, fmt, args);
-		seq_puts(seq, gi->string);
+		seq_vprintf(seq, fmt, args);
 	} else {
 		vaf.fmt = fmt;
 		vaf.va = &args;
-- 
cgit v1.2.3-70-g09d2


From 0fe2f1e929ecabf834f4af2ffd300fe70700f4b3 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 11 Jun 2012 13:49:47 +0100
Subject: GFS2: Size seq_file buffer more carefully

This places a limit on the buffer size for archs with larger
PAGE_SIZE.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 fs/gfs2/glock.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4d5d63d9d2c..1ed81f40da0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1977,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
 	.show  = gfs2_sbstats_seq_show,
 };
 
+#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
+
 static int gfs2_glocks_open(struct inode *inode, struct file *file)
 {
 	int ret = seq_open_private(file, &gfs2_glock_seq_ops,
@@ -1985,9 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
-		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
-			seq->size = 8*PAGE_SIZE;
+			seq->size = GFS2_SEQ_GOODSIZE;
 	}
 	return ret;
 }
@@ -2000,9 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
-		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
-			seq->size = 8*PAGE_SIZE;
+			seq->size = GFS2_SEQ_GOODSIZE;
 	}
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From 0d515210b6969ecfc161f71a4515831d9a6e58f4 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 13 Jun 2012 10:27:41 -0400
Subject: GFS2: Add kobject release method

This patch adds a kobject release function that properly maintains
the kobject use count, so that accesses to the sysfs files do not
cause an access to freed kernel memory after an unmount.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_fstype.c | 36 ++++++++++++++++++++++++------------
 fs/gfs2/sys.c        | 21 ++++++++++++++++++---
 2 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b8c250fc492..9b2389756ac 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	}
 
 	error = init_names(sdp, silent);
-	if (error)
-		goto fail;
+	if (error) {
+		/* In this case, we haven't initialized sysfs, so we have to
+		   manually free the sdp. */
+		free_percpu(sdp->sd_lkstats);
+		kfree(sdp);
+		sb->s_fs_info = NULL;
+		return error;
+	}
 
 	snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
 
-	gfs2_create_debugfs_file(sdp);
-
 	error = gfs2_sys_fs_add(sdp);
+	/*
+	 * If we hit an error here, gfs2_sys_fs_add will have called function
+	 * kobject_put which causes the sysfs usage count to go to zero, which
+	 * causes sysfs to call function gfs2_sbd_release, which frees sdp.
+	 * Subsequent error paths here will call gfs2_sys_fs_del, which also
+	 * kobject_put to free sdp.
+	 */
 	if (error)
-		goto fail;
+		return error;
+
+	gfs2_create_debugfs_file(sdp);
 
 	error = gfs2_lm_mount(sdp, silent);
 	if (error)
-		goto fail_sys;
+		goto fail_debug;
 
 	error = init_locking(sdp, &mount_gh, DO);
 	if (error)
@@ -1215,12 +1228,12 @@ fail_locking:
 fail_lm:
 	gfs2_gl_hash_clear(sdp);
 	gfs2_lm_unmount(sdp);
-fail_sys:
-	gfs2_sys_fs_del(sdp);
-fail:
+fail_debug:
 	gfs2_delete_debugfs_file(sdp);
 	free_percpu(sdp->sd_lkstats);
-	kfree(sdp);
+	/* gfs2_sys_fs_del must be the last thing we do, since it causes
+	 * sysfs to call function gfs2_sbd_release, which frees sdp. */
+	gfs2_sys_fs_del(sdp);
 	sb->s_fs_info = NULL;
 	return error;
 }
@@ -1390,10 +1403,9 @@ static void gfs2_kill_sb(struct super_block *sb)
 	sdp->sd_root_dir = NULL;
 	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
-	kill_block_super(sb);
 	gfs2_delete_debugfs_file(sdp);
 	free_percpu(sdp->sd_lkstats);
-	kfree(sdp);
+	kill_block_super(sb);
 }
 
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b1d5f..e4bee4bebbf 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = {
 	NULL,
 };
 
+static void gfs2_sbd_release(struct kobject *kobj)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+
+	kfree(sdp);
+}
+
 static struct kobj_type gfs2_ktype = {
+	.release = gfs2_sbd_release,
 	.default_attrs = gfs2_attrs,
 	.sysfs_ops     = &gfs2_attr_ops,
 };
@@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	char ro[20];
 	char spectator[20];
 	char *envp[] = { ro, spectator, NULL };
+	int sysfs_frees_sdp = 0;
 
 	sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
 	sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
 				     "%s", sdp->sd_table_name);
 	if (error)
-		goto fail;
+		goto fail_reg;
 
+	sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
+				function gfs2_sbd_release. */
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
 		goto fail_reg;
@@ -615,9 +626,13 @@ fail_lock_module:
 fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_reg:
-	kobject_put(&sdp->sd_kobj);
-fail:
+	free_percpu(sdp->sd_lkstats);
 	fs_err(sdp, "error %d adding sysfs files", error);
+	if (sysfs_frees_sdp)
+		kobject_put(&sdp->sd_kobj);
+	else
+		kfree(sdp);
+	sb->s_fs_info = NULL;
 	return error;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 666d1d8ad201803862514317c17695925e61316b Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 13 Jun 2012 23:03:56 -0400
Subject: GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve

This function combines rgrp functions get_local_rgrp and
gfs2_inplace_reserve so that the double retry loop is gone.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 82 +++++++++++++++++++++-------------------------------------
 1 file changed, 29 insertions(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3c6f7ed16a3..e53d0a1c234 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1207,25 +1207,30 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 }
 
 /**
- * get_local_rgrp - Choose and lock a rgrp for allocation
+ * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
- * @last_unlinked: the last unlinked block
- *
- * Try to acquire rgrp in way which avoids contending with others.
+ * @requested: the number of blocks to be reserved
  *
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
 	struct gfs2_blkreserv *rs = ip->i_res;
-	int error, rg_locked, flags = LM_FLAG_TRY;
+	int error = 0, rg_locked, flags = LM_FLAG_TRY;
+	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
+	rs = ip->i_res;
+	rs->rs_requested = requested;
+	if (gfs2_assert_warn(sdp, requested)) {
+		error = -EINVAL;
+		goto out;
+	}
 
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
 		rgd = begin = ip->i_rgd;
@@ -1263,63 +1268,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 			if (rgd->rd_flags & GFS2_RDF_CHECK) {
 				if (sdp->sd_args.ar_rgrplvb)
 					gfs2_rgrp_bh_get(rgd);
-				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+				try_rgrp_unlink(rgd, &last_unlinked,
+						ip->i_no_addr);
 			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
-			if (rgd == begin) {
-				flags &= ~LM_FLAG_TRY;
-				loops++;
-			}
+			if (rgd != begin) /* If we didn't wrap */
+				break;
+
+			flags &= ~LM_FLAG_TRY;
+			loops++;
+			/* Check that fs hasn't grown if writing to rindex */
+			if (ip == GFS2_I(sdp->sd_rindex) &&
+			    !sdp->sd_rindex_uptodate) {
+				error = gfs2_ri_update(ip);
+				if (error)
+					goto out;
+			} else if (loops == 2)
+				/* Flushing the log may release space */
+				gfs2_log_flush(sdp, NULL);
 			break;
 		default:
-			return error;
+			goto out;
 		}
 	}
-
-	return -ENOSPC;
-}
-
-/**
- * gfs2_inplace_reserve - Reserve space in the filesystem
- * @ip: the inode to reserve space for
- * @requested: the number of blocks to be reserved
- *
- * Returns: errno
- */
-
-int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_blkreserv *rs;
-	int error = 0;
-	u64 last_unlinked = NO_BLOCK;
-	int tries = 0;
-
-	rs = ip->i_res;
-	rs->rs_requested = requested;
-	if (gfs2_assert_warn(sdp, requested)) {
-		error = -EINVAL;
-		goto out;
-	}
-
-	do {
-		error = get_local_rgrp(ip, &last_unlinked);
-		if (error != -ENOSPC)
-			break;
-		/* Check that fs hasn't grown if writing to rindex */
-		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
-			error = gfs2_ri_update(ip);
-			if (error)
-				break;
-			continue;
-		}
-		/* Flushing the log may release space */
-		gfs2_log_flush(sdp, NULL);
-	} while (tries++ < 3);
+	error = -ENOSPC;
 
 out:
 	if (error)
-- 
cgit v1.2.3-70-g09d2


From 44b8db13860a449b5d85afdc65da654ce56da678 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Mon, 18 Jun 2012 16:31:31 +0900
Subject: GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when
 EIO occurs

This patch fixes buffer_head double free in following code path:

gfs2_block_map
=> gfs2_meta_inode_buffer
 => gfs2_meta_indirect_buffer
  => gfs2_meta_read
=> release_metapath

gfs2_block_map calls gfs2_meta_inode_buffer with &mp.mp_bh[0]
as an argument. mp.mp_bh are filled with zero at the beginning
of gfs2_block_map.

If gfs2_meta_inode_buffer returns non-zero value, gfs2_block_map
calls release_metapath to free buffers chained to mp.mp_bh.
release_metapath checks each slot of mp.mp_bh[i] and
free(with brelse) unless the slot is filled with NULL.

&mp.mp_bh[0] passed to gfs2_meta_inode_buffer is filled at
gfs2_meta_read. gfs2_meta_read is filled a buffer allocated with
gfs2_getbuf even if EIO occurs. When EIO occurs, the allocated buffer
is brelse'ed though the pointer(wrong poiner) points the brelse'ed is
passed back to caller via an argument bhp.

gfs2_meta_indirect_buffer, the caller also pass the wrong pointer
to its caller with EIO. Finally gfs2_block_map gets both EIO and
&mp.mp_bh[0] filled with the wrong pointer. release_metapath
calls brelse again on the wrong pointer.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/meta_io.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6c1e5d1c404..3a56c8d94de 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct buffer_head *bh;
 
-	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+		*bhp = NULL;
 		return -EIO;
+	}
 
 	*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
 
@@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		if (tr && tr->tr_touched)
 			gfs2_io_error_bh(sdp, bh);
 		brelse(bh);
+		*bhp = NULL;
 		return -EIO;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From f3da93105b6963a2be2a56dee27fdc88ac4ad769 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Mon, 28 May 2012 23:40:17 +0800
Subject: quota: fix checkpatch.pl warning by replacing <asm/uaccess.h> with
 <linux/uaccess.h>

checkpatch.pl warns:

"WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>"

Below patch fixes it.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 2 +-
 fs/quota/quota.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10cbe841cb7..0c541dcbdf0 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -78,7 +78,7 @@
 #include <linux/quotaops.h>
 #include "../internal.h" /* ugh */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9a391204ca2..639782d5a76 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -9,7 +9,7 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
-- 
cgit v1.2.3-70-g09d2


From f007dbf8e51f4a0910194eebc2aa119eb861893e Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 24 May 2012 12:00:37 -0500
Subject: ext3: force ro mount if ext3_setup_super() fails

If ext3_setup_super() fails i.e. due to a too-high revision,
the error is logged in dmesg but the fs is not mounted RO as
indicated.

Tested by:

[164152.114551] EXT3-fs (sdb6): error: revision level too high, forcing read-only mode
/dev/sdb6 /mnt/test2 ext3 rw,seclabel,relatime,errors=continue,user_xattr,acl,barrier=1,data=ordered 0 0
                          ^^

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c3a44b7c37..b4e19926f46 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2058,7 +2058,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount3;
 	}
 
-	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+		sb->s_flags |= MS_RDONLY;
 
 	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
 	ext3_orphan_cleanup(sb, es);
-- 
cgit v1.2.3-70-g09d2


From db8109ef98b5fb7e26e0d265c02f7164b13009d4 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 4 Jun 2012 14:46:24 +0300
Subject: udf: stop using s_dirt

The UDF file-system does not need the 's_dirt' superblock flag because it does
not define the 'write_super()' method. This flag was set to 1 in few places and
set to 0 in '->sync_fs()' and was basically useless. Stop using it because it
is on its way out.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c   | 6 ------
 fs/udf/udfdecl.h | 1 -
 2 files changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8d86a8706c0..e7534fb84c2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1974,7 +1974,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sb->s_op = &udf_sb_ops;
 	sb->s_export_op = &udf_export_ops;
 
-	sb->s_dirt = 0;
 	sb->s_magic = UDF_SUPER_MAGIC;
 	sb->s_time_gran = 1000;
 
@@ -2096,10 +2095,6 @@ void _udf_err(struct super_block *sb, const char *function,
 	struct va_format vaf;
 	va_list args;
 
-	/* mark sb error */
-	if (!(sb->s_flags & MS_RDONLY))
-		sb->s_dirt = 1;
-
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
@@ -2161,7 +2156,6 @@ static int udf_sync_fs(struct super_block *sb, int wait)
 		 * the buffer for IO
 		 */
 		mark_buffer_dirty(sbi->s_lvid_bh);
-		sb->s_dirt = 0;
 		sbi->s_lvid_dirty = 0;
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ebe10314e51..de038da6f6b 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -129,7 +129,6 @@ static inline void udf_updated_lvid(struct super_block *sb)
 	WARN_ON_ONCE(((struct logicalVolIntegrityDesc *)
 		     bh->b_data)->integrityType !=
 		     cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN));
-	sb->s_dirt = 1;
 	UDF_SB(sb)->s_lvid_dirty = 1;
 }
 extern u64 lvid_get_unique_id(struct super_block *sb);
-- 
cgit v1.2.3-70-g09d2


From a0e589b485cd5e6a74d40d195b3d7de212b4227d Mon Sep 17 00:00:00 2001
From: Ashish Sangwan <ashishsangwan2@gmail.com>
Date: Tue, 26 Jun 2012 19:33:11 +0530
Subject: UDF: Remove unnecessary variable "offset" from udf_fill_inode

The variable "offset" is not needed. Remove it.

Signed-off-by: Ashish Sangwan <ashish.sangwan2@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/inode.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 873e1bab9c4..fafaad795cd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1247,7 +1247,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 {
 	struct fileEntry *fe;
 	struct extendedFileEntry *efe;
-	int offset;
 	struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
 	struct udf_inode_info *iinfo = UDF_I(inode);
 	unsigned int link_count;
@@ -1359,7 +1358,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
 		iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
 		iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
-		offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr;
 	} else {
 		inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
 		    (inode->i_sb->s_blocksize_bits - 9);
@@ -1381,8 +1379,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
 		iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
 		iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
-		offset = sizeof(struct extendedFileEntry) +
-							iinfo->i_lenEAttr;
 	}
 
 	switch (fe->icbTag.fileType) {
-- 
cgit v1.2.3-70-g09d2


From e124a32043416ddefaec3c54cc945b7667c00628 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Thu, 28 Jun 2012 00:49:44 +0800
Subject: ext2: cleanup the confused goto label

Cleanup the confused goto label, since the big lock has been removed.

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b3621cb7ea3..c8e49794ab5 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -771,13 +771,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	err = -ENOMEM;
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
-		goto failed_unlock;
+		goto failed;
 
 	sbi->s_blockgroup_lock =
 		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
 	if (!sbi->s_blockgroup_lock) {
 		kfree(sbi);
-		goto failed_unlock;
+		goto failed;
 	}
 	sb->s_fs_info = sbi;
 	sbi->s_sb_block = sb_block;
@@ -1130,7 +1130,7 @@ failed_sbi:
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-failed_unlock:
+failed:
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From bff943af6fe3af022c1c7a22cdb2e18a242eaf35 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 27 Jun 2012 22:27:05 +0200
Subject: udf: Fix memory leak when mounting

When we are mounting filesystem, we can load one partition table before
finding out that we cannot complete processing of logical volume descriptor
and trying the reserve descriptor. Free the table properly before trying
the reserve descriptor.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 122 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 64 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index e7534fb84c2..8a758386781 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -252,6 +252,63 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
 	return 0;
 }
 
+static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
+{
+	int i;
+	int nr_groups = bitmap->s_nr_groups;
+	int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
+						nr_groups);
+
+	for (i = 0; i < nr_groups; i++)
+		if (bitmap->s_block_bitmap[i])
+			brelse(bitmap->s_block_bitmap[i]);
+
+	if (size <= PAGE_SIZE)
+		kfree(bitmap);
+	else
+		vfree(bitmap);
+}
+
+static void udf_free_partition(struct udf_part_map *map)
+{
+	int i;
+	struct udf_meta_data *mdata;
+
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
+		iput(map->s_uspace.s_table);
+	if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
+		iput(map->s_fspace.s_table);
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
+		udf_sb_free_bitmap(map->s_uspace.s_bitmap);
+	if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
+		udf_sb_free_bitmap(map->s_fspace.s_bitmap);
+	if (map->s_partition_type == UDF_SPARABLE_MAP15)
+		for (i = 0; i < 4; i++)
+			brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
+	else if (map->s_partition_type == UDF_METADATA_MAP25) {
+		mdata = &map->s_type_specific.s_metadata;
+		iput(mdata->s_metadata_fe);
+		mdata->s_metadata_fe = NULL;
+
+		iput(mdata->s_mirror_fe);
+		mdata->s_mirror_fe = NULL;
+
+		iput(mdata->s_bitmap_fe);
+		mdata->s_bitmap_fe = NULL;
+	}
+}
+
+static void udf_sb_free_partitions(struct super_block *sb)
+{
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	int i;
+
+	for (i = 0; i < sbi->s_partitions; i++)
+		udf_free_partition(&sbi->s_partmaps[i]);
+	kfree(sbi->s_partmaps);
+	sbi->s_partmaps = NULL;
+}
+
 static int udf_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct super_block *sb = root->d_sb;
@@ -1596,7 +1653,11 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
 	/* responsible for finding the PartitionDesc(s) */
 	if (!udf_process_sequence(sb, main_s, main_e, fileset))
 		return 1;
-	return !udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+	udf_sb_free_partitions(sb);
+	if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
+		return 1;
+	udf_sb_free_partitions(sb);
+	return 0;
 }
 
 /*
@@ -1861,55 +1922,8 @@ u64 lvid_get_unique_id(struct super_block *sb)
 	return ret;
 }
 
-static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
-{
-	int i;
-	int nr_groups = bitmap->s_nr_groups;
-	int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-						nr_groups);
-
-	for (i = 0; i < nr_groups; i++)
-		if (bitmap->s_block_bitmap[i])
-			brelse(bitmap->s_block_bitmap[i]);
-
-	if (size <= PAGE_SIZE)
-		kfree(bitmap);
-	else
-		vfree(bitmap);
-}
-
-static void udf_free_partition(struct udf_part_map *map)
-{
-	int i;
-	struct udf_meta_data *mdata;
-
-	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
-		iput(map->s_uspace.s_table);
-	if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
-		iput(map->s_fspace.s_table);
-	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
-		udf_sb_free_bitmap(map->s_uspace.s_bitmap);
-	if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
-		udf_sb_free_bitmap(map->s_fspace.s_bitmap);
-	if (map->s_partition_type == UDF_SPARABLE_MAP15)
-		for (i = 0; i < 4; i++)
-			brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
-	else if (map->s_partition_type == UDF_METADATA_MAP25) {
-		mdata = &map->s_type_specific.s_metadata;
-		iput(mdata->s_metadata_fe);
-		mdata->s_metadata_fe = NULL;
-
-		iput(mdata->s_mirror_fe);
-		mdata->s_mirror_fe = NULL;
-
-		iput(mdata->s_bitmap_fe);
-		mdata->s_bitmap_fe = NULL;
-	}
-}
-
 static int udf_fill_super(struct super_block *sb, void *options, int silent)
 {
-	int i;
 	int ret;
 	struct inode *inode = NULL;
 	struct udf_options uopt;
@@ -2071,9 +2085,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 error_out:
 	if (sbi->s_vat_inode)
 		iput(sbi->s_vat_inode);
-	if (sbi->s_partitions)
-		for (i = 0; i < sbi->s_partitions; i++)
-			udf_free_partition(&sbi->s_partmaps[i]);
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
 		unload_nls(sbi->s_nls_map);
@@ -2081,8 +2092,7 @@ error_out:
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_close_lvid(sb);
 	brelse(sbi->s_lvid_bh);
-
-	kfree(sbi->s_partmaps);
+	udf_sb_free_partitions(sb);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
 
@@ -2123,16 +2133,12 @@ void _udf_warn(struct super_block *sb, const char *function,
 
 static void udf_put_super(struct super_block *sb)
 {
-	int i;
 	struct udf_sb_info *sbi;
 
 	sbi = UDF_SB(sb);
 
 	if (sbi->s_vat_inode)
 		iput(sbi->s_vat_inode);
-	if (sbi->s_partitions)
-		for (i = 0; i < sbi->s_partitions; i++)
-			udf_free_partition(&sbi->s_partmaps[i]);
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
 		unload_nls(sbi->s_nls_map);
@@ -2140,7 +2146,7 @@ static void udf_put_super(struct super_block *sb)
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_close_lvid(sb);
 	brelse(sbi->s_lvid_bh);
-	kfree(sbi->s_partmaps);
+	udf_sb_free_partitions(sb);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
 }
-- 
cgit v1.2.3-70-g09d2


From 17dc59ba418c3d6b0675d5b74d280acab2d4e369 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 9 Jul 2012 13:24:21 +0200
Subject: udf: Do not decrement i_blocks when freeing indirect extent block

Indirect extent block is not accounted in i_blocks during allocation
thus we should not decrement i_blocks when we are freeing such block
during truncation.

Reported-by: Steve Nickel <snickel58@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/truncate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4b98fee8e16..8a9657d7f7c 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -248,7 +248,7 @@ void udf_truncate_extents(struct inode *inode)
 				/* We managed to free all extents in the
 				 * indirect extent - free it too */
 				BUG_ON(!epos.bh);
-				udf_free_blocks(sb, inode, &epos.block,
+				udf_free_blocks(sb, NULL, &epos.block,
 						0, indirect_ext_len);
 			} else if (!epos.bh) {
 				iinfo->i_lenAlloc = lenalloc;
@@ -275,7 +275,7 @@ void udf_truncate_extents(struct inode *inode)
 
 	if (indirect_ext_len) {
 		BUG_ON(!epos.bh);
-		udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len);
+		udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len);
 	} else if (!epos.bh) {
 		iinfo->i_lenAlloc = lenalloc;
 		mark_inode_dirty(inode);
-- 
cgit v1.2.3-70-g09d2


From 349ecd6a3c0e4f97fa4dc6bd3917455ccc106d23 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 9 Jul 2012 23:38:36 +0200
Subject: jbd: Check return value of blkdev_issue_flush()

blkdev_issue_flush() can fail. Make sure the error gets properly propagated.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/recovery.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 008bf062fd2..a748fe21465 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -265,8 +265,11 @@ int journal_recover(journal_t *journal)
 	if (!err)
 		err = err2;
 	/* Flush disk caches to get replayed data on the permanent storage */
-	if (journal->j_flags & JFS_BARRIER)
-		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+	if (journal->j_flags & JFS_BARRIER) {
+		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+		if (!err)
+			err = err2;
+	}
 
 	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From 44f4f729e7a143b08bd63c33cb78b3181d9f4716 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 9 Jul 2012 23:40:46 +0200
Subject: ext3: Check return value of blkdev_issue_flush()

blkdev_issue_flush() can fail. Make sure the error gets properly propagated.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/fsync.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d4dff278cbd..b31dbd4c46a 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -92,8 +92,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 * disk caches manually so that data really is on persistent
 	 * storage
 	 */
-	if (needs_barrier)
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+	if (needs_barrier) {
+		int err;
+
+		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		if (!ret)
+			ret = err;
+	}
 out:
 	trace_ext3_sync_file_exit(inode, ret);
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 57b9655d01ef057a523e810d29c37ac09b80eead Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 10 Jul 2012 17:58:04 +0200
Subject: udf: Improve table length check to avoid possible overflow

When a partition table length is corrupted to be close to 1 << 32, the
check for its length may overflow on 32-bit systems and we will think
the length is valid. Later on the kernel can crash trying to read beyond
end of buffer. Fix the check to avoid possible overflow.

CC: stable@vger.kernel.org
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8a758386781..dcbf98722af 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1340,7 +1340,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 	BUG_ON(ident != TAG_IDENT_LVD);
 	lvd = (struct logicalVolDesc *)bh->b_data;
 	table_len = le32_to_cpu(lvd->mapTableLength);
-	if (sizeof(*lvd) + table_len > sb->s_blocksize) {
+	if (table_len > sb->s_blocksize - sizeof(*lvd)) {
 		udf_err(sb, "error loading logical volume descriptor: "
 			"Partition table too long (%u > %lu)\n", table_len,
 			sb->s_blocksize - sizeof(*lvd));
-- 
cgit v1.2.3-70-g09d2


From 475d0094293b51353e342d1198377967dbc48169 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <dong.aisheng@linaro.org>
Date: Wed, 11 Jul 2012 15:16:37 +1000
Subject: of: Improve prom_update_property() function

prom_update_property() currently fails if the property doesn't
actually exist yet which isn't what we want. Change to add-or-update
instead of update-only, then we can remove a lot duplicated lines.

Suggested-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Dong Aisheng <dong.aisheng@linaro.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/p1022_ds.c    |  8 +-------
 arch/powerpc/platforms/pseries/mobility.c |  8 +-------
 arch/powerpc/platforms/pseries/reconfig.c | 16 ++++++----------
 drivers/of/base.c                         | 15 +++++++++++----
 fs/proc/proc_devtree.c                    |  5 +++++
 include/linux/of.h                        |  3 +--
 6 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 74e310b4b46..31d18b964f9 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -348,13 +348,7 @@ void __init p1022_ds_pic_init(void)
  */
 static void __init disable_one_node(struct device_node *np, struct property *new)
 {
-	struct property *old;
-
-	old = of_find_property(np, new->name, NULL);
-	if (old)
-		prom_update_property(np, new, old);
-	else
-		prom_add_property(np, new);
+	prom_update_property(np, new);
 }
 
 /* TRUE if there is a "video=fslfb" command-line parameter. */
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 029a562af37..dd30b12edfe 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -67,7 +67,6 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 			      const char *name, u32 vd, char *value)
 {
 	struct property *new_prop = *prop;
-	struct property *old_prop;
 	int more = 0;
 
 	/* A negative 'vd' value indicates that only part of the new property
@@ -117,12 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 	}
 
 	if (!more) {
-		old_prop = of_find_property(dn, new_prop->name, NULL);
-		if (old_prop)
-			prom_update_property(dn, new_prop, old_prop);
-		else
-			prom_add_property(dn, new_prop);
-
+		prom_update_property(dn, new_prop);
 		new_prop = NULL;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 7b3bf76ef83..39f71fba9b3 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -432,7 +432,7 @@ static int do_update_property(char *buf, size_t bufsize)
 	unsigned char *value;
 	char *name, *end, *next_prop;
 	int rc, length;
-	struct property *newprop, *oldprop;
+	struct property *newprop;
 	buf = parse_node(buf, bufsize, &np);
 	end = buf + bufsize;
 
@@ -443,6 +443,9 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!next_prop)
 		return -EINVAL;
 
+	if (!strlen(name))
+		return -ENODEV;
+
 	newprop = new_property(name, length, value, NULL);
 	if (!newprop)
 		return -ENOMEM;
@@ -450,18 +453,11 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	oldprop = of_find_property(np, name,NULL);
-	if (!oldprop) {
-		if (strlen(name))
-			return prom_add_property(np, newprop);
-		return -ENODEV;
-	}
-
 	upd_value.node = np;
 	upd_value.property = newprop;
 	pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
 
-	rc = prom_update_property(np, newprop, oldprop);
+	rc = prom_update_property(np, newprop);
 	if (rc)
 		return rc;
 
@@ -486,7 +482,7 @@ static int do_update_property(char *buf, size_t bufsize)
 
 		rc = pSeries_reconfig_notify(action, value);
 		if (rc) {
-			prom_update_property(np, oldprop, newprop);
+			prom_update_property(np, newprop);
 			return rc;
 		}
 	}
diff --git a/drivers/of/base.c b/drivers/of/base.c
index eada3f4ef80..bc86ea2af66 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1073,7 +1073,8 @@ int prom_remove_property(struct device_node *np, struct property *prop)
 }
 
 /*
- * prom_update_property - Update a property in a node.
+ * prom_update_property - Update a property in a node, if the property does
+ * not exist, add it.
  *
  * Note that we don't actually remove it, since we have given out
  * who-knows-how-many pointers to the data using get-property.
@@ -1081,13 +1082,19 @@ int prom_remove_property(struct device_node *np, struct property *prop)
  * and add the new property to the property list
  */
 int prom_update_property(struct device_node *np,
-			 struct property *newprop,
-			 struct property *oldprop)
+			 struct property *newprop)
 {
-	struct property **next;
+	struct property **next, *oldprop;
 	unsigned long flags;
 	int found = 0;
 
+	if (!newprop->name)
+		return -EINVAL;
+
+	oldprop = of_find_property(np, newprop->name, NULL);
+	if (!oldprop)
+		return prom_add_property(np, newprop);
+
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 927cbd115e5..df7dd08d439 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -101,6 +101,11 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 {
 	struct proc_dir_entry *ent;
 
+	if (!oldprop) {
+		proc_device_tree_add_prop(pde, newprop);
+		return;
+	}
+
 	for (ent = pde->subdir; ent != NULL; ent = ent->next)
 		if (ent->data == oldprop)
 			break;
diff --git a/include/linux/of.h b/include/linux/of.h
index 2ec1083af7f..b27c87191df 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -260,8 +260,7 @@ extern int of_machine_is_compatible(const char *compat);
 extern int prom_add_property(struct device_node* np, struct property* prop);
 extern int prom_remove_property(struct device_node *np, struct property *prop);
 extern int prom_update_property(struct device_node *np,
-				struct property *newprop,
-				struct property *oldprop);
+				struct property *newprop);
 
 #if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
-- 
cgit v1.2.3-70-g09d2


From 10983f2e8dc65d118371681548809109b570b63b Mon Sep 17 00:00:00 2001
From: Liu Bo <liubo2009@cn.fujitsu.com>
Date: Wed, 11 Jul 2012 15:26:19 +0800
Subject: Btrfs: fix typo in convert_extent_bit

It should be convert_extent_bit.

Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/extent_io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c9018a05036..97f6703fd49 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -933,7 +933,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
 
 
 /**
- * convert_extent - convert all bits in a given range from one bit to another
+ * convert_extent_bit - convert all bits in a given range from one bit to
+ * 			another
  * @tree:	the io tree to search
  * @start:	the start offset in bytes
  * @end:	the end offset in bytes (inclusive)
-- 
cgit v1.2.3-70-g09d2


From fe685aabf7c8c9f138e5ea900954d295bf229175 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 12 Jul 2012 08:46:54 +0200
Subject: isofs: avoid info leak on export

For type 1 the parent_offset member in struct isofs_fid gets copied
uninitialized to userland. Fix this by initializing it to 0.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/isofs/export.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index aa4356d09ee..1d3804492aa 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -134,6 +134,7 @@ isofs_export_encode_fh(struct inode *inode,
 	len = 3;
 	fh32[0] = ei->i_iget5_block;
  	fh16[2] = (__u16)ei->i_iget5_offset;  /* fh16 [sic] */
+	fh16[3] = 0;  /* avoid leaking uninitialized data */
 	fh32[2] = inode->i_generation;
 	if (parent) {
 		struct iso_inode_info *eparent;
-- 
cgit v1.2.3-70-g09d2


From 0143fc5e9f6f5aad4764801015bc8d4b4a278200 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 12 Jul 2012 08:46:55 +0200
Subject: udf: avoid info leak on export

For type 0x51 the udf.parent_partref member in struct fid gets copied
uninitialized to userland. Fix this by initializing it to 0.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/namei.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 18024178ac4..c31deb382af 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1279,6 +1279,7 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
 	*lenp = 3;
 	fid->udf.block = location.logicalBlockNum;
 	fid->udf.partref = location.partitionReferenceNum;
+	fid->udf.parent_partref = 0;
 	fid->udf.generation = inode->i_generation;
 
 	if (parent) {
-- 
cgit v1.2.3-70-g09d2


From e3a746f5aab71f2dd0a83116772922fb37ae29d6 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:42 +1000
Subject: xfs: really fix the cursor leak in xfs_alloc_ag_vextent_near

The current cursor is reallocated when retrying the allocation, so
the existing cursor needs to be destroyed in both the restart and
the failure cases.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7e273..f654f51b0c6 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
 	 * If we couldn't get anything, give up.
 	 */
 	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
 		if (!forced++) {
 			trace_xfs_alloc_near_busy(args);
 			xfs_log_force(args->mp, XFS_LOG_SYNC);
 			goto restart;
 		}
-
-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		trace_xfs_alloc_size_neither(args);
 		args->agbno = NULLAGBLOCK;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From aa292847b9fc6e187547110de833a7d3131bbddf Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:43 +1000
Subject: xfs: don't defer metadata allocation to the workqueue

Almost all metadata allocations come from shallow stack usage
situations. Avoid the overhead of switching the allocation to a
workqueue as we are not in danger of running out of stack when
making these allocations. Metadata allocations are already marked
through the args that are passed down, so this is trivial to do.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reported-by: Mel Gorman <mgorman@suse.de>
Tested-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_alloc.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f654f51b0c6..4f33c32affe 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
 	current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
 
-
-int				/* error */
+/*
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
 xfs_alloc_vextent(
-	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+	struct xfs_alloc_arg	*args)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 
+	if (!args->userdata)
+		return __xfs_alloc_vextent(args);
+
+
 	args->done = &done;
 	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
 	queue_work(xfs_alloc_wq, &args->work);
-- 
cgit v1.2.3-70-g09d2


From 40a9b7963df32e743c45d79a5f41445fe2476f15 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 2 Jul 2012 06:00:04 -0400
Subject: xfs: prevent recursion in xfs_buf_iorequest

If the b_iodone handler is run in calling context in xfs_buf_iorequest we
can run into a recursion where xfs_buf_iodone_callbacks keeps calling back
into xfs_buf_iorequest because an I/O error happened, which keeps calling
back into xfs_buf_iorequest.  This chain will usually not take long
because the filesystem gets shut down because of log I/O errors, but even
over a short time it can cause stack overflows if run on the same context.

As a short term workaround make sure we always call the iodone handler in
workqueue context.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb421018..687b275f165 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1243,7 +1243,7 @@ xfs_buf_iorequest(
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
 	_xfs_buf_ioapply(bp);
-	_xfs_buf_ioend(bp, 0);
+	_xfs_buf_ioend(bp, 1);
 
 	xfs_buf_rele(bp);
 }
-- 
cgit v1.2.3-70-g09d2


From 1632dcc93f55f9ab407b373da1957a727b1a7fe3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 13 Jul 2012 02:24:10 -0400
Subject: xfs: do not call xfs_bdstrat_cb in xfs_buf_iodone_callbacks

xfs_bdstrat_cb only adds a check for a shutdown filesystem over
xfs_buf_iorequest, but xfs_buf_iodone_callbacks just checked for a shut down
filesystem a little earlier.  In addition the shutdown handling in
xfs_bdstrat_cb is not very suitable for this caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c      | 51 ++++++++++++++++++++++-----------------------------
 fs/xfs/xfs_buf.h      |  1 -
 fs/xfs/xfs_buf_item.c |  2 +-
 3 files changed, 23 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 687b275f165..269b35c084d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(
 		(__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
 }
 
-int
-xfs_bwrite(
-	struct xfs_buf		*bp)
-{
-	int			error;
-
-	ASSERT(xfs_buf_islocked(bp));
-
-	bp->b_flags |= XBF_WRITE;
-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
-
-	xfs_bdstrat_cb(bp);
-
-	error = xfs_buf_iowait(bp);
-	if (error) {
-		xfs_force_shutdown(bp->b_target->bt_mount,
-				   SHUTDOWN_META_IO_ERROR);
-	}
-	return error;
-}
-
 /*
  * Called when we want to stop a buffer from getting written or read.
  * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1079,14 +1058,7 @@ xfs_bioerror_relse(
 	return EIO;
 }
 
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
+STATIC int
 xfs_bdstrat_cb(
 	struct xfs_buf	*bp)
 {
@@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(
 	return 0;
 }
 
+int
+xfs_bwrite(
+	struct xfs_buf		*bp)
+{
+	int			error;
+
+	ASSERT(xfs_buf_islocked(bp));
+
+	bp->b_flags |= XBF_WRITE;
+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+
+	xfs_bdstrat_cb(bp);
+
+	error = xfs_buf_iowait(bp);
+	if (error) {
+		xfs_force_shutdown(bp->b_target->bt_mount,
+				   SHUTDOWN_META_IO_ERROR);
+	}
+	return error;
+}
+
 /*
  * Wrapper around bdstrat so that we can stop data from going to disk in case
  * we are shutting down the filesystem.  Typically user data goes thru this
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce3..79344c48008 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 extern int xfs_bwrite(struct xfs_buf *bp);
 
 extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern void xfs_buf_ioend(xfs_buf_t *,	int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d4..d9e451115f9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
 
 		if (!XFS_BUF_ISSTALE(bp)) {
 			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
-			xfs_bdstrat_cb(bp);
+			xfs_buf_iorequest(bp);
 		} else {
 			xfs_buf_relse(bp);
 		}
-- 
cgit v1.2.3-70-g09d2


From bc86256d2e80e6731a2055175d9a32cf96eb71f8 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:51 +0300
Subject: affs: stop setting bm_flags

AFFS stores values '1' and '2' in 'bm_flags', and I fail to see any logic when
it prefers one or another. AFFS writes '1' only from '->put_super()', while
'->sync_fs()' and '->write_super()' store value '2'.  So on the first glance,
it looks like we want to have '1' if we unmount.  However, this does not really
happen in these cases:
  1. superblock is written via 'write_super()' then we unmount;
  2. we re-mount R/O, then unmount.
which are quite typical.

I could not find good documentation describing this field, except of one random
piece of documentation in the internet which says that -1 means that the root
block is valid, which is not consistent with what we have in the Linux AFFS
driver.

Jan Kara commented on this: "I have some vague recollection that on Amiga
boolean was usually encoded as: 0 == false, ~0 == -1 == true. But it has been
ages..."

Thus, my conclusion is that value of '1' is as good as value of '2' and we can
just always use '2'. An Jan Kara suggested to go further: "generally bm_flags
handling looks strange. If they are 0, we mount fs read only and thus cannot
change them.  If they are != 0, we write 2 there. So IMHO if you just removed
bm_flags setting, nothing will really happen."

So this patch removes the bm_flags setting completely. This makes the "clean"
argument of the 'affs_commit_super()' function unneeded, so it is also removed.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0782653a05a..1d42e468abd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -25,13 +25,12 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
-affs_commit_super(struct super_block *sb, int wait, int clean)
+affs_commit_super(struct super_block *sb, int wait)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	struct buffer_head *bh = sbi->s_root_bh;
 	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
 
-	tail->bm_flag = cpu_to_be32(clean);
 	secs_to_datestamp(get_seconds(), &tail->disk_change);
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty(bh);
@@ -46,7 +45,7 @@ affs_put_super(struct super_block *sb)
 	pr_debug("AFFS: put_super()\n");
 
 	if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
-		affs_commit_super(sb, 1, 1);
+		affs_commit_super(sb, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
@@ -60,7 +59,7 @@ affs_write_super(struct super_block *sb)
 {
 	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY))
-		affs_commit_super(sb, 1, 2);
+		affs_commit_super(sb, 1);
 	sb->s_dirt = 0;
 	unlock_super(sb);
 
@@ -71,7 +70,7 @@ static int
 affs_sync_fs(struct super_block *sb, int wait)
 {
 	lock_super(sb);
-	affs_commit_super(sb, wait, 2);
+	affs_commit_super(sb, wait);
 	sb->s_dirt = 0;
 	unlock_super(sb);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From c9753b1d20e13c94d15a1c8b252a696744bd22a2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:52 +0300
Subject: affs: remove useless superblock writeout on unmount

We do not need to write out the superblock from '->put_super()' because VFS has
already called '->sync_fs()' by this time and the superblock has already been
written out. Thus, remove the 'affs_commit_super()' infocation from
'affs_put_super()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index 1d42e468abd..12b4f58081b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -44,9 +44,6 @@ affs_put_super(struct super_block *sb)
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	pr_debug("AFFS: put_super()\n");
 
-	if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
-		affs_commit_super(sb, 1);
-
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
 	affs_brelse(sbi->s_root_bh);
-- 
cgit v1.2.3-70-g09d2


From 0164b1a32e6849121ea73ef3124a2994951a4713 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:53 +0300
Subject: affs: remove useless superblock writeout on remount

We do not need to write out the superblock from '->remount_fs()' because
VFS has already called '->sync_fs()' by this time and the superblock has
already been written out. Thus, remove the 'affs_write_super()'
infocation from 'affs_remount()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index 12b4f58081b..c837e43687a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -545,10 +545,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 
-	if (*flags & MS_RDONLY) {
-		affs_write_super(sb);
+	if (*flags & MS_RDONLY)
 		affs_free_bitmap(sb);
-	} else
+	else
 		res = affs_init_bitmap(sb, flags);
 
 	return res;
-- 
cgit v1.2.3-70-g09d2


From e0471c8d8abbc2b07fc82f7b02896d1637909319 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:54 +0300
Subject: affs: re-structure superblock locking a bit

AFFS wants to serialize the superblock (the root block in AFFS terms) updates
and uses 'lock_super()/unlock_super()' for these purposes. This patch pushes the
locking down to the 'affs_commit_super()' from the callers.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index c837e43687a..4ceec563643 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -31,11 +31,13 @@ affs_commit_super(struct super_block *sb, int wait)
 	struct buffer_head *bh = sbi->s_root_bh;
 	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
 
+	lock_super(sb);
 	secs_to_datestamp(get_seconds(), &tail->disk_change);
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty(bh);
 	if (wait)
 		sync_dirty_buffer(bh);
+	unlock_super(sb);
 }
 
 static void
@@ -54,22 +56,17 @@ affs_put_super(struct super_block *sb)
 static void
 affs_write_super(struct super_block *sb)
 {
-	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY))
 		affs_commit_super(sb, 1);
 	sb->s_dirt = 0;
-	unlock_super(sb);
-
 	pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
 }
 
 static int
 affs_sync_fs(struct super_block *sb, int wait)
 {
-	lock_super(sb);
 	affs_commit_super(sb, wait);
 	sb->s_dirt = 0;
-	unlock_super(sb);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From a837107439ea50116e59943556d6902c09e52772 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:55 +0300
Subject: affs: stop using lock_super

The VFS's 'lock_super()' and 'unlock_super()' calls are deprecated and unwanted
and just wait for a brave knight who'd kill them. This patch makes AFFS stop
using them and use the buffer-head's own lock instead.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4ceec563643..da7498da80a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -31,13 +31,14 @@ affs_commit_super(struct super_block *sb, int wait)
 	struct buffer_head *bh = sbi->s_root_bh;
 	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
 
-	lock_super(sb);
+	lock_buffer(bh);
 	secs_to_datestamp(get_seconds(), &tail->disk_change);
 	affs_fix_checksum(sb, bh);
+	unlock_buffer(bh);
+
 	mark_buffer_dirty(bh);
 	if (wait)
 		sync_dirty_buffer(bh);
-	unlock_super(sb);
 }
 
 static void
-- 
cgit v1.2.3-70-g09d2


From a215fef7edfdcd8948037ceb3060b9ae7ebcef8b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:56 +0300
Subject: affs: introduce VFS superblock object back-reference

Add an 'sb' VFS superblock back-reference to the 'struct affs_sb_info' data
structure - we will need to find the VFS superblock from a 'struct
affs_sb_info' object in the next patch, so this change is jut a preparation.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/affs.h  | 1 +
 fs/affs/super.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1fceb320d2f..5a726e99224 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -100,6 +100,7 @@ struct affs_sb_info {
 	char *s_prefix;			/* Prefix for volumes and assigns. */
 	char s_volume[32];		/* Volume prefix for absolute symlinks. */
 	spinlock_t symlink_lock;	/* protects the previous two */
+	struct super_block *sb;		/* the VFS superblock object */
 };
 
 #define SF_INTL		0x0001		/* International filesystem. */
diff --git a/fs/affs/super.c b/fs/affs/super.c
index da7498da80a..0496cbbeda1 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -299,6 +299,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
+	sbi->sb = sb;
 	mutex_init(&sbi->s_bmlock);
 	spin_lock_init(&sbi->symlink_lock);
 
-- 
cgit v1.2.3-70-g09d2


From 3dd847820d138c9d60764b0e920380373285ff10 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 6 Jun 2012 18:56:57 +0300
Subject: affs: get rid of affs_sync_super

This patch makes affs stop using the VFS '->write_super()' method along with
the 's_dirt' superblock flag, because they are on their way out.

The whole "superblock write-out" VFS infrastructure is served by the
'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and
writes out all dirty superblocks using the '->write_super()' call-back.  But the
problem with this thread is that it wastes power by waking up the system every
5 seconds, even if there are no diry superblocks, or there are no client
file-systems which would need this (e.g., btrfs does not use
'->write_super()'). So we want to kill it completely and thus, we need to make
file-systems to stop using the '->write_super()' VFS service, and then remove
it together with the kernel thread.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/affs.h   |  6 ++++++
 fs/affs/bitmap.c |  4 ++--
 fs/affs/super.c  | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 45 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 5a726e99224..3a130e27eb1 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -3,6 +3,7 @@
 #include <linux/buffer_head.h>
 #include <linux/amigaffs.h>
 #include <linux/mutex.h>
+#include <linux/workqueue.h>
 
 /* AmigaOS allows file names with up to 30 characters length.
  * Names longer than that will be silently truncated. If you
@@ -101,6 +102,9 @@ struct affs_sb_info {
 	char s_volume[32];		/* Volume prefix for absolute symlinks. */
 	spinlock_t symlink_lock;	/* protects the previous two */
 	struct super_block *sb;		/* the VFS superblock object */
+	int work_queued;		/* non-zero delayed work is queued */
+	struct delayed_work sb_work;	/* superblock flush delayed work */
+	spinlock_t work_lock;		/* protects sb_work and work_queued */
 };
 
 #define SF_INTL		0x0001		/* International filesystem. */
@@ -121,6 +125,8 @@ static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+void affs_mark_sb_dirty(struct super_block *sb);
+
 /* amigaffs.c */
 
 extern int	affs_insert_hash(struct inode *inode, struct buffer_head *bh);
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 3e262711ae0..6e0be43ef6e 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -103,7 +103,7 @@ affs_free_block(struct super_block *sb, u32 block)
 	*(__be32 *)bh->b_data = cpu_to_be32(tmp - mask);
 
 	mark_buffer_dirty(bh);
-	sb->s_dirt = 1;
+	affs_mark_sb_dirty(sb);
 	bm->bm_free++;
 
 	mutex_unlock(&sbi->s_bmlock);
@@ -248,7 +248,7 @@ find_bit:
 	*(__be32 *)bh->b_data = cpu_to_be32(tmp + mask);
 
 	mark_buffer_dirty(bh);
-	sb->s_dirt = 1;
+	affs_mark_sb_dirty(sb);
 
 	mutex_unlock(&sbi->s_bmlock);
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0496cbbeda1..c70f1e5fc02 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -17,6 +17,7 @@
 #include <linux/magic.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/writeback.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
@@ -47,6 +48,7 @@ affs_put_super(struct super_block *sb)
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	pr_debug("AFFS: put_super()\n");
 
+	cancel_delayed_work_sync(&sbi->sb_work);
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
 	affs_brelse(sbi->s_root_bh);
@@ -54,23 +56,45 @@ affs_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static void
-affs_write_super(struct super_block *sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		affs_commit_super(sb, 1);
-	sb->s_dirt = 0;
-	pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
-}
-
 static int
 affs_sync_fs(struct super_block *sb, int wait)
 {
 	affs_commit_super(sb, wait);
-	sb->s_dirt = 0;
 	return 0;
 }
 
+static void flush_superblock(struct work_struct *work)
+{
+	struct affs_sb_info *sbi;
+	struct super_block *sb;
+
+	sbi = container_of(work, struct affs_sb_info, sb_work.work);
+	sb = sbi->sb;
+
+	spin_lock(&sbi->work_lock);
+	sbi->work_queued = 0;
+	spin_unlock(&sbi->work_lock);
+
+	affs_commit_super(sb, 1);
+}
+
+void affs_mark_sb_dirty(struct super_block *sb)
+{
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+	unsigned long delay;
+
+	if (sb->s_flags & MS_RDONLY)
+	       return;
+
+	spin_lock(&sbi->work_lock);
+	if (!sbi->work_queued) {
+	       delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+	       queue_delayed_work(system_long_wq, &sbi->sb_work, delay);
+	       sbi->work_queued = 1;
+	}
+	spin_unlock(&sbi->work_lock);
+}
+
 static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
@@ -132,7 +156,6 @@ static const struct super_operations affs_sops = {
 	.write_inode	= affs_write_inode,
 	.evict_inode	= affs_evict_inode,
 	.put_super	= affs_put_super,
-	.write_super	= affs_write_super,
 	.sync_fs	= affs_sync_fs,
 	.statfs		= affs_statfs,
 	.remount_fs	= affs_remount,
@@ -302,6 +325,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->sb = sb;
 	mutex_init(&sbi->s_bmlock);
 	spin_lock_init(&sbi->symlink_lock);
+	spin_lock_init(&sbi->work_lock);
+	INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock);
 
 	if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
 				&blocksize,&sbi->s_prefix,
@@ -526,6 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
+	flush_delayed_work_sync(&sbi->sb_work);
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
-- 
cgit v1.2.3-70-g09d2


From d187663ef24cd3d033f0cbf2867e70b36a3a90b8 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Thu, 7 Jun 2012 15:45:00 -0700
Subject: fs/direct-io.c: adjust suspicious bit operation

READ is 0, so the result of the bit-and operation is 0.  Rewrite with == as
done elsewhere in the same file.

This problem was found using Coccinelle (http://coccinelle.lip6.fr/).

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0c85fae3766..1faf4cb56f3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1258,7 +1258,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 */
 	BUG_ON(retval == -EIOCBQUEUED);
 	if (dio->is_async && retval == 0 && dio->result &&
-	    ((rw & READ) || (dio->result == sdio.size)))
+	    ((rw == READ) || (dio->result == sdio.size)))
 		retval = -EIOCBQUEUED;
 
 	if (retval != -EIOCBQUEUED)
-- 
cgit v1.2.3-70-g09d2


From f7a99c5b7c8bd3d3f533c8b38274e33f3da9096e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 00:59:08 -0400
Subject: get rid of ->mnt_longterm

it's enough to set ->mnt_ns of internal vfsmounts to something
distinct from all struct mnt_namespace out there; then we can
just use the check for ->mnt_ns != NULL in the fast path of
mntput_no_expire()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c    |  2 +-
 fs/fs_struct.c | 32 ++++++++++----------------------
 fs/internal.h  |  2 --
 fs/mount.h     |  9 ++++++++-
 fs/namespace.c | 53 +++++++----------------------------------------------
 5 files changed, 26 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 40469044088..44acb5b29ae 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2622,7 +2622,7 @@ global_root:
 	if (!slash)
 		error = prepend(buffer, buflen, "/", 1);
 	if (!error)
-		error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
+		error = is_mounted(vfsmnt) ? 1 : 2;
 	goto out;
 }
 
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index e159e682ad4..5df4775fea0 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,18 +6,6 @@
 #include <linux/fs_struct.h>
 #include "internal.h"
 
-static inline void path_get_longterm(struct path *path)
-{
-	path_get(path);
-	mnt_make_longterm(path->mnt);
-}
-
-static inline void path_put_longterm(struct path *path)
-{
-	mnt_make_shortterm(path->mnt);
-	path_put(path);
-}
-
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
  * It can block.
@@ -26,7 +14,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 {
 	struct path old_root;
 
-	path_get_longterm(path);
+	path_get(path);
 	spin_lock(&fs->lock);
 	write_seqcount_begin(&fs->seq);
 	old_root = fs->root;
@@ -34,7 +22,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 	if (old_root.dentry)
-		path_put_longterm(&old_root);
+		path_put(&old_root);
 }
 
 /*
@@ -45,7 +33,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
 	struct path old_pwd;
 
-	path_get_longterm(path);
+	path_get(path);
 	spin_lock(&fs->lock);
 	write_seqcount_begin(&fs->seq);
 	old_pwd = fs->pwd;
@@ -54,7 +42,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
-		path_put_longterm(&old_pwd);
+		path_put(&old_pwd);
 }
 
 static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
@@ -84,7 +72,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 			write_seqcount_end(&fs->seq);
 			while (hits--) {
 				count++;
-				path_get_longterm(new_root);
+				path_get(new_root);
 			}
 			spin_unlock(&fs->lock);
 		}
@@ -92,13 +80,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 	while (count--)
-		path_put_longterm(old_root);
+		path_put(old_root);
 }
 
 void free_fs_struct(struct fs_struct *fs)
 {
-	path_put_longterm(&fs->root);
-	path_put_longterm(&fs->pwd);
+	path_put(&fs->root);
+	path_put(&fs->pwd);
 	kmem_cache_free(fs_cachep, fs);
 }
 
@@ -132,9 +120,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 
 		spin_lock(&old->lock);
 		fs->root = old->root;
-		path_get_longterm(&fs->root);
+		path_get(&fs->root);
 		fs->pwd = old->pwd;
-		path_get_longterm(&fs->pwd);
+		path_get(&fs->pwd);
 		spin_unlock(&old->lock);
 	}
 	return fs;
diff --git a/fs/internal.h b/fs/internal.h
index 18bc216ea09..d2a23ff61b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -50,8 +50,6 @@ extern int copy_mount_string(const void __user *, char **);
 extern struct vfsmount *lookup_mnt(struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
 
-extern void mnt_make_longterm(struct vfsmount *);
-extern void mnt_make_shortterm(struct vfsmount *);
 extern int sb_prepare_remount_readonly(struct super_block *);
 
 extern void __init mnt_init(void);
diff --git a/fs/mount.h b/fs/mount.h
index 4ef36d93e5a..05a2a1185ef 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -22,7 +22,6 @@ struct mount {
 	struct vfsmount mnt;
 #ifdef CONFIG_SMP
 	struct mnt_pcp __percpu *mnt_pcp;
-	atomic_t mnt_longterm;		/* how many of the refs are longterm */
 #else
 	int mnt_count;
 	int mnt_writers;
@@ -49,6 +48,8 @@ struct mount {
 	int mnt_ghosts;
 };
 
+#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
+
 static inline struct mount *real_mount(struct vfsmount *mnt)
 {
 	return container_of(mnt, struct mount, mnt);
@@ -59,6 +60,12 @@ static inline int mnt_has_parent(struct mount *mnt)
 	return mnt != mnt->mnt_parent;
 }
 
+static inline int is_mounted(struct vfsmount *mnt)
+{
+	/* neither detached nor internal? */
+	return !IS_ERR_OR_NULL(real_mount(mnt));
+}
+
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
 
 static inline void get_mnt_ns(struct mnt_namespace *ns)
diff --git a/fs/namespace.c b/fs/namespace.c
index 1e4a5fe3d7b..a524ea4dbd8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -621,21 +621,6 @@ static void attach_mnt(struct mount *mnt, struct path *path)
 	list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
 }
 
-static inline void __mnt_make_longterm(struct mount *mnt)
-{
-#ifdef CONFIG_SMP
-	atomic_inc(&mnt->mnt_longterm);
-#endif
-}
-
-/* needs vfsmount lock for write */
-static inline void __mnt_make_shortterm(struct mount *mnt)
-{
-#ifdef CONFIG_SMP
-	atomic_dec(&mnt->mnt_longterm);
-#endif
-}
-
 /*
  * vfsmount lock must be held for write
  */
@@ -649,10 +634,8 @@ static void commit_tree(struct mount *mnt)
 	BUG_ON(parent == mnt);
 
 	list_add_tail(&head, &mnt->mnt_list);
-	list_for_each_entry(m, &head, mnt_list) {
+	list_for_each_entry(m, &head, mnt_list)
 		m->mnt_ns = n;
-		__mnt_make_longterm(m);
-	}
 
 	list_splice(&head, n->list.prev);
 
@@ -804,7 +787,8 @@ static void mntput_no_expire(struct mount *mnt)
 put_again:
 #ifdef CONFIG_SMP
 	br_read_lock(&vfsmount_lock);
-	if (likely(atomic_read(&mnt->mnt_longterm))) {
+	if (likely(mnt->mnt_ns)) {
+		/* shouldn't be the last one */
 		mnt_add_count(mnt, -1);
 		br_read_unlock(&vfsmount_lock);
 		return;
@@ -1074,8 +1058,6 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
-		if (p->mnt_ns)
-			__mnt_make_shortterm(p);
 		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
@@ -2209,23 +2191,6 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	return new_ns;
 }
 
-void mnt_make_longterm(struct vfsmount *mnt)
-{
-	__mnt_make_longterm(real_mount(mnt));
-}
-
-void mnt_make_shortterm(struct vfsmount *m)
-{
-#ifdef CONFIG_SMP
-	struct mount *mnt = real_mount(m);
-	if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
-		return;
-	br_write_lock(&vfsmount_lock);
-	atomic_dec(&mnt->mnt_longterm);
-	br_write_unlock(&vfsmount_lock);
-#endif
-}
-
 /*
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
@@ -2265,18 +2230,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	q = new;
 	while (p) {
 		q->mnt_ns = new_ns;
-		__mnt_make_longterm(q);
 		if (fs) {
 			if (&p->mnt == fs->root.mnt) {
 				fs->root.mnt = mntget(&q->mnt);
-				__mnt_make_longterm(q);
-				mnt_make_shortterm(&p->mnt);
 				rootmnt = &p->mnt;
 			}
 			if (&p->mnt == fs->pwd.mnt) {
 				fs->pwd.mnt = mntget(&q->mnt);
-				__mnt_make_longterm(q);
-				mnt_make_shortterm(&p->mnt);
 				pwdmnt = &p->mnt;
 			}
 		}
@@ -2320,7 +2280,6 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 	if (!IS_ERR(new_ns)) {
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
-		__mnt_make_longterm(mnt);
 		new_ns->root = mnt;
 		list_add(&new_ns->list, &mnt->mnt_list);
 	} else {
@@ -2615,7 +2574,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 		 * it is a longterm mount, don't release mnt until
 		 * we unmount before file sys is unregistered
 		*/
-		mnt_make_longterm(mnt);
+		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
 	}
 	return mnt;
 }
@@ -2625,7 +2584,9 @@ void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
 	if (!IS_ERR_OR_NULL(mnt)) {
-		mnt_make_shortterm(mnt);
+		br_write_lock(&vfsmount_lock);
+		real_mount(mnt)->mnt_ns = NULL;
+		br_write_unlock(&vfsmount_lock);
 		mntput(mnt);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 6ce6e24e72233073c8ead9419fc5040d44803dae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 01:16:59 -0400
Subject: get rid of magic in proc_namespace.c

don't rely on proc_mounts->m being the first field; container_of()
is there for purpose.  No need to bother with ->private, while
we are at it - the same container_of will do nicely.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h          | 4 +++-
 fs/namespace.c      | 6 +++---
 fs/proc_namespace.c | 7 +++----
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/mount.h b/fs/mount.h
index 05a2a1185ef..4f291f9de64 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -74,10 +74,12 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 }
 
 struct proc_mounts {
-	struct seq_file m; /* must be the first element */
+	struct seq_file m;
 	struct mnt_namespace *ns;
 	struct path root;
 	int (*show)(struct seq_file *, struct vfsmount *);
 };
 
+#define proc_mounts(p) (container_of((p), struct proc_mounts, m))
+
 extern const struct seq_operations mounts_op;
diff --git a/fs/namespace.c b/fs/namespace.c
index a524ea4dbd8..8f412abcb67 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -923,7 +923,7 @@ EXPORT_SYMBOL(replace_mount_options);
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+	struct proc_mounts *p = proc_mounts(m);
 
 	down_read(&namespace_sem);
 	return seq_list_start(&p->ns->list, *pos);
@@ -931,7 +931,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+	struct proc_mounts *p = proc_mounts(m);
 
 	return seq_list_next(v, &p->ns->list, pos);
 }
@@ -943,7 +943,7 @@ static void m_stop(struct seq_file *m, void *v)
 
 static int m_show(struct seq_file *m, void *v)
 {
-	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = list_entry(v, struct mount, mnt_list);
 	return p->show(m, &r->mnt);
 }
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5e289a7cbad..5fe34c355e8 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -17,7 +17,7 @@
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
-	struct proc_mounts *p = file->private_data;
+	struct proc_mounts *p = proc_mounts(file->private_data);
 	struct mnt_namespace *ns = p->ns;
 	unsigned res = POLLIN | POLLRDNORM;
 
@@ -121,7 +121,7 @@ out:
 
 static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 {
-	struct proc_mounts *p = m->private;
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -268,7 +268,6 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 	if (ret)
 		goto err_free;
 
-	p->m.private = p;
 	p->ns = ns;
 	p->root = root;
 	p->m.poll_event = ns->event;
@@ -288,7 +287,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 
 static int mounts_release(struct inode *inode, struct file *file)
 {
-	struct proc_mounts *p = file->private_data;
+	struct proc_mounts *p = proc_mounts(file->private_data);
 	path_put(&p->root);
 	put_mnt_ns(p->ns);
 	return seq_release(inode, file);
-- 
cgit v1.2.3-70-g09d2


From 63a44583f3a4408b902a3d7ba18b4ab13d1309ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 11:49:04 -0400
Subject: qnx6: don't bother with ->i_dentry in inode-freeing callback

we'll initialize it in inode_init_always() when we allocate that
object again.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/qnx6/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index e44012dc564..2049c814bda 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -622,7 +622,6 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb)
 static void qnx6_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
-	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
 }
 
-- 
cgit v1.2.3-70-g09d2


From e6f9f8d0296aad7fbaf01de38ccaa1bf654bbda4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 11:50:36 -0400
Subject: cifs: don't bother with ->i_dentry in ->destroy_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/cifsfs.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b6e344eb0b..bcab12c8714 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -257,7 +257,6 @@ cifs_alloc_inode(struct super_block *sb)
 static void cifs_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
-	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7968ce12e9645c5eb5bb3f4320e43c2e402d580c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 11:51:12 -0400
Subject: adfs: don't bother with ->i_dentry in ->destroy_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06fdcc9382c..bdaec92353c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -246,7 +246,6 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
 static void adfs_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
-	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3084ee95f08ce353ae26c18c7627c4e9786983ca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 13:03:04 -0400
Subject: affs: get rid of open-coded list_for_each_entry()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/amigaffs.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 52a6407682e..1c7fd7928d1 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -126,18 +126,13 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 {
 	struct inode *inode = dentry->d_inode;
 	void *data = dentry->d_fsdata;
-	struct list_head *head, *next;
 
 	spin_lock(&inode->i_lock);
-	head = &inode->i_dentry;
-	next = head->next;
-	while (next != head) {
-		dentry = list_entry(next, struct dentry, d_alias);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = data;
 			break;
 		}
-		next = next->next;
 	}
 	spin_unlock(&inode->i_lock);
 }
-- 
cgit v1.2.3-70-g09d2


From 12447c40394695c9a19920c65fea124bdf3ea034 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 13:06:09 -0400
Subject: affs: unobfuscate affs_fix_dcache()

and add a comment on what it's doing

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/amigaffs.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 1c7fd7928d1..843cdc99480 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -122,15 +122,13 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
 }
 
 static void
-affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
+affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
-	struct inode *inode = dentry->d_inode;
-	void *data = dentry->d_fsdata;
-
+	struct dentry *dentry;
 	spin_lock(&inode->i_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
-			dentry->d_fsdata = data;
+			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
 		}
 	}
@@ -172,7 +170,11 @@ affs_remove_link(struct dentry *dentry)
 		}
 
 		affs_lock_dir(dir);
-		affs_fix_dcache(dentry, link_ino);
+		/*
+		 * if there's a dentry for that block, make it
+		 * refer to inode itself.
+		 */
+		affs_fix_dcache(inode, link_ino);
 		retval = affs_remove_hash(dir, link_bh);
 		if (retval) {
 			affs_unlock_dir(dir);
-- 
cgit v1.2.3-70-g09d2


From a614a092bf28d58c742b9ec43209f3f78c3d9fb3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 13:09:15 -0400
Subject: ocfs2: use list_for_each_entry in ocfs2_find_local_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/dcache.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index e5ba3481833..a40edc1e1d8 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -170,13 +170,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 				      u64 parent_blkno,
 				      int skip_unhashed)
 {
-	struct list_head *p;
-	struct dentry *dentry = NULL;
+	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	list_for_each(p, &inode->i_dentry) {
-		dentry = list_entry(p, struct dentry, d_alias);
-
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
@@ -184,16 +181,13 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 
 			dget_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
-			break;
+			spin_unlock(&inode->i_lock);
+			return dentry;
 		}
 		spin_unlock(&dentry->d_lock);
-
-		dentry = NULL;
 	}
-
 	spin_unlock(&inode->i_lock);
-
-	return dentry;
+	return NULL;
 }
 
 DEFINE_SPINLOCK(dentry_attach_lock);
-- 
cgit v1.2.3-70-g09d2


From 9f713878f22e0b2d34d62df0ca55f65166375634 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 13:19:12 -0400
Subject: ext4: get rid of open-coded d_find_any_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/fsync.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index bb6c7d81131..4359a4d3006 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode)
 	inode = igrab(inode);
 	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
 		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
-		dentry = NULL;
-		spin_lock(&inode->i_lock);
-		if (!list_empty(&inode->i_dentry)) {
-			dentry = list_first_entry(&inode->i_dentry,
-						  struct dentry, d_alias);
-			dget(dentry);
-		}
-		spin_unlock(&inode->i_lock);
+		dentry = d_find_any_alias(inode);
 		if (!dentry)
 			break;
 		next = igrab(dentry->d_parent->d_inode);
-- 
cgit v1.2.3-70-g09d2


From b3d9b7a3c752dc4b6976a4ff7b8298887a5b734d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 13:51:19 -0400
Subject: vfs: switch i_dentry/d_alias to hlist

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/amigaffs.c     |  3 ++-
 fs/btrfs/inode.c       |  2 +-
 fs/cifs/inode.c        |  5 +++--
 fs/dcache.c            | 33 ++++++++++++++++++---------------
 fs/exportfs/expfs.c    |  3 ++-
 fs/ext4/fsync.c        |  2 +-
 fs/fuse/dir.c          |  2 +-
 fs/inode.c             |  2 +-
 fs/nfs/getroot.c       |  2 +-
 fs/notify/fsnotify.c   |  3 ++-
 fs/ocfs2/dcache.c      |  3 ++-
 include/linux/dcache.h |  2 +-
 include/linux/fs.h     |  2 +-
 13 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 843cdc99480..eb82ee53ee0 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,8 +125,9 @@ static void
 affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
 	struct dentry *dentry;
+	struct hlist_node *p;
 	spin_lock(&inode->i_lock);
-	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a7d1921ac76..a101572f1ce 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6987,7 +6987,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	struct btrfs_ordered_extent *ordered;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
-	WARN_ON(!list_empty(&inode->i_dentry));
+	WARN_ON(!hlist_empty(&inode->i_dentry));
 	WARN_ON(inode->i_data.nrpages);
 	WARN_ON(BTRFS_I(inode)->outstanding_extents);
 	WARN_ON(BTRFS_I(inode)->reserved_extents);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 745da3d0653..8e8bb49112f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ cifs_find_inode(struct inode *inode, void *opaque)
 		return 0;
 
 	/* if it's not a directory or has no dentries, then flag it */
-	if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry))
+	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry))
 		fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
 
 	return 1;
@@ -825,9 +825,10 @@ static bool
 inode_has_hashed_dentries(struct inode *inode)
 {
 	struct dentry *dentry;
+	struct hlist_node *p;
 
 	spin_lock(&inode->i_lock);
-	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&inode->i_lock);
 			return true;
diff --git a/fs/dcache.c b/fs/dcache.c
index 44acb5b29ae..015586f1ffc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -218,7 +218,7 @@ static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 
-	WARN_ON(!list_empty(&dentry->d_alias));
+	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
@@ -267,7 +267,7 @@ static void dentry_iput(struct dentry * dentry)
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
 		dentry->d_inode = NULL;
-		list_del_init(&dentry->d_alias);
+		hlist_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&inode->i_lock);
 		if (!inode->i_nlink)
@@ -291,7 +291,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	dentry->d_inode = NULL;
-	list_del_init(&dentry->d_alias);
+	hlist_del_init(&dentry->d_alias);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
@@ -699,10 +699,11 @@ EXPORT_SYMBOL(dget_parent);
 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 {
 	struct dentry *alias, *discon_alias;
+	struct hlist_node *p;
 
 again:
 	discon_alias = NULL;
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
 		spin_lock(&alias->d_lock);
  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
@@ -737,7 +738,7 @@ struct dentry *d_find_alias(struct inode *inode)
 {
 	struct dentry *de = NULL;
 
-	if (!list_empty(&inode->i_dentry)) {
+	if (!hlist_empty(&inode->i_dentry)) {
 		spin_lock(&inode->i_lock);
 		de = __d_find_alias(inode, 0);
 		spin_unlock(&inode->i_lock);
@@ -753,9 +754,10 @@ EXPORT_SYMBOL(d_find_alias);
 void d_prune_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
+	struct hlist_node *p;
 restart:
 	spin_lock(&inode->i_lock);
-	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_count) {
 			__dget_dlock(dentry);
@@ -977,7 +979,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			inode = dentry->d_inode;
 			if (inode) {
 				dentry->d_inode = NULL;
-				list_del_init(&dentry->d_alias);
+				hlist_del_init(&dentry->d_alias);
 				if (dentry->d_op && dentry->d_op->d_iput)
 					dentry->d_op->d_iput(dentry, inode);
 				else
@@ -1312,7 +1314,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
-	INIT_LIST_HEAD(&dentry->d_alias);
+	INIT_HLIST_NODE(&dentry->d_alias);
 	INIT_LIST_HEAD(&dentry->d_u.d_child);
 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
 
@@ -1400,7 +1402,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	if (inode) {
 		if (unlikely(IS_AUTOMOUNT(inode)))
 			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-		list_add(&dentry->d_alias, &inode->i_dentry);
+		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
 	}
 	dentry->d_inode = inode;
 	dentry_rcuwalk_barrier(dentry);
@@ -1425,7 +1427,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	BUG_ON(!list_empty(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
 	if (inode)
 		spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
@@ -1458,13 +1460,14 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
+	struct hlist_node *p;
 
 	if (!inode) {
 		__d_instantiate(entry, NULL);
 		return NULL;
 	}
 
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
 		/*
 		 * Don't need alias->d_lock here, because aliases with
 		 * d_parent == entry->d_parent are not subject to name or
@@ -1490,7 +1493,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
 	struct dentry *result;
 
-	BUG_ON(!list_empty(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
 
 	if (inode)
 		spin_lock(&inode->i_lock);
@@ -1531,9 +1534,9 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 {
 	struct dentry *alias;
 
-	if (list_empty(&inode->i_dentry))
+	if (hlist_empty(&inode->i_dentry))
 		return NULL;
-	alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
 	__dget(alias);
 	return alias;
 }
@@ -1607,7 +1610,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
-	list_add(&tmp->d_alias, &inode->i_dentry);
+	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
 	hlist_bl_unlock(&tmp->d_sb->s_anon);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b0201ca6e9c..b42063cf1b2 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -44,13 +44,14 @@ find_acceptable_alias(struct dentry *result,
 {
 	struct dentry *dentry, *toput = NULL;
 	struct inode *inode;
+	struct hlist_node *p;
 
 	if (acceptable(context, result))
 		return result;
 
 	inode = result->d_inode;
 	spin_lock(&inode->i_lock);
-	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 		dget(dentry);
 		spin_unlock(&inode->i_lock);
 		if (toput)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 4359a4d3006..2a1dcea4f12 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -225,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!journal) {
 		ret = __sync_inode(inode, datasync);
-		if (!ret && !list_empty(&inode->i_dentry))
+		if (!ret && !hlist_empty(&inode->i_dentry))
 			ret = ext4_sync_parent(inode);
 		goto out;
 	}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 334e0b18a01..f7543f72897 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -249,7 +249,7 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry,
 		/* This tries to shrink the subtree below alias */
 		fuse_invalidate_entry(alias);
 		dput(alias);
-		if (!list_empty(&inode->i_dentry))
+		if (!hlist_empty(&inode->i_dentry))
 			return ERR_PTR(-EBUSY);
 	} else {
 		dput(alias);
diff --git a/fs/inode.c b/fs/inode.c
index c99163b1b31..775cbabd4fa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	}
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
-	INIT_LIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
+	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
 #ifdef CONFIG_FS_POSIX_ACL
 	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
 #endif
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 8abfb19bd3a..a67990f90bd 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 */
 		spin_lock(&sb->s_root->d_inode->i_lock);
 		spin_lock(&sb->s_root->d_lock);
-		list_del_init(&sb->s_root->d_alias);
+		hlist_del_init(&sb->s_root->d_alias);
 		spin_unlock(&sb->s_root->d_lock);
 		spin_unlock(&sb->s_root->d_inode->i_lock);
 	}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index b39c5c161ad..6baadb5a843 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,6 +52,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 void __fsnotify_update_child_dentry_flags(struct inode *inode)
 {
 	struct dentry *alias;
+	struct hlist_node *p;
 	int watched;
 
 	if (!S_ISDIR(inode->i_mode))
@@ -63,7 +64,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
 		struct dentry *child;
 
 		/* run all of the children of the original inode and fix their
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index a40edc1e1d8..af4488268e4 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -170,10 +170,11 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 				      u64 parent_blkno,
 				      int skip_unhashed)
 {
+	struct hlist_node *p;
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 094789ff3e9..8ca25551820 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -128,7 +128,7 @@ struct dentry {
 	 	struct rcu_head d_rcu;
 	} d_u;
 	struct list_head d_subdirs;	/* our children */
-	struct list_head d_alias;	/* inode alias list */
+	struct hlist_node d_alias;	/* inode alias list */
 };
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 17fd887c798..f06db6bd5a7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -826,7 +826,7 @@ struct inode {
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
 	union {
-		struct list_head	i_dentry;
+		struct hlist_head	i_dentry;
 		struct rcu_head		i_rcu;
 	};
 	u64			i_version;
-- 
cgit v1.2.3-70-g09d2


From 1d674107ea4b68669e012e654d64369b7f2bb250 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 19:52:19 -0400
Subject: coda: use list_for_each_entry

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/cache.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 69015787618..958ae0e0ff8 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -89,17 +89,13 @@ int coda_cache_check(struct inode *inode, int mask)
 /* this won't do any harm: just flag all children */
 static void coda_flag_children(struct dentry *parent, int flag)
 {
-	struct list_head *child;
 	struct dentry *de;
 
 	spin_lock(&parent->d_lock);
-	list_for_each(child, &parent->d_subdirs)
-	{
-		de = list_entry(child, struct dentry, d_u.d_child);
+	list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
 		/* don't know what to do with negative dentries */
-		if ( ! de->d_inode ) 
-			continue;
-		coda_flag_inode(de->d_inode, flag);
+		if (de->d_inode ) 
+			coda_flag_inode(de->d_inode, flag);
 	}
 	spin_unlock(&parent->d_lock);
 	return; 
-- 
cgit v1.2.3-70-g09d2


From 6d7b5aaed7d887b34f29f900244cdbd17a86637c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 04:15:17 -0400
Subject: namei.c: let follow_link() do put_link() on failure

no need for kludgy "set cookie to ERR_PTR(...) because we failed
before we did actual ->follow_link() and want to suppress put_link()",
no pointless check in put_link() itself.

Callers checked if follow_link() has failed anyway; might as well
break out of their loops if that happened, without bothering
to call put_link() first.

[AV: folded fixes from hch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 74 ++++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 7d694194024..6135a14d5a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -605,7 +605,7 @@ static inline void path_to_nameidata(const struct path *path,
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
 {
 	struct inode *inode = link->dentry->d_inode;
-	if (!IS_ERR(cookie) && inode->i_op->put_link)
+	if (inode->i_op->put_link)
 		inode->i_op->put_link(link->dentry, nd, cookie);
 	path_put(link);
 }
@@ -613,19 +613,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
 static __always_inline int
 follow_link(struct path *link, struct nameidata *nd, void **p)
 {
-	int error;
 	struct dentry *dentry = link->dentry;
+	int error;
+	char *s;
 
 	BUG_ON(nd->flags & LOOKUP_RCU);
 
 	if (link->mnt == nd->path.mnt)
 		mntget(link->mnt);
 
-	if (unlikely(current->total_link_count >= 40)) {
-		*p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
-		path_put(&nd->path);
-		return -ELOOP;
-	}
+	error = -ELOOP;
+	if (unlikely(current->total_link_count >= 40))
+		goto out_put_nd_path;
+
 	cond_resched();
 	current->total_link_count++;
 
@@ -633,30 +633,37 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 	nd_set_link(nd, NULL);
 
 	error = security_inode_follow_link(link->dentry, nd);
-	if (error) {
-		*p = ERR_PTR(error); /* no ->put_link(), please */
-		path_put(&nd->path);
-		return error;
-	}
+	if (error)
+		goto out_put_nd_path;
 
 	nd->last_type = LAST_BIND;
 	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(*p);
-	if (!IS_ERR(*p)) {
-		char *s = nd_get_link(nd);
-		error = 0;
-		if (s)
-			error = __vfs_follow_link(nd, s);
-		else if (nd->last_type == LAST_BIND) {
-			nd->flags |= LOOKUP_JUMPED;
-			nd->inode = nd->path.dentry->d_inode;
-			if (nd->inode->i_op->follow_link) {
-				/* stepped on a _really_ weird one */
-				path_put(&nd->path);
-				error = -ELOOP;
-			}
+	if (IS_ERR(*p))
+		goto out_put_link;
+
+	error = 0;
+	s = nd_get_link(nd);
+	if (s) {
+		error = __vfs_follow_link(nd, s);
+	} else if (nd->last_type == LAST_BIND) {
+		nd->flags |= LOOKUP_JUMPED;
+		nd->inode = nd->path.dentry->d_inode;
+		if (nd->inode->i_op->follow_link) {
+			/* stepped on a _really_ weird one */
+			path_put(&nd->path);
+			error = -ELOOP;
 		}
 	}
+	if (unlikely(error))
+		put_link(nd, link, *p);
+
+	return error;
+
+out_put_nd_path:
+	path_put(&nd->path);
+out_put_link:
+	path_put(link);
 	return error;
 }
 
@@ -1383,9 +1390,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 		void *cookie;
 
 		res = follow_link(&link, nd, &cookie);
-		if (!res)
-			res = walk_component(nd, path, &nd->last,
-					     nd->last_type, LOOKUP_FOLLOW);
+		if (res)
+			break;
+		res = walk_component(nd, path, &nd->last,
+				     nd->last_type, LOOKUP_FOLLOW);
 		put_link(nd, &link, cookie);
 	} while (res > 0);
 
@@ -1777,8 +1785,9 @@ static int path_lookupat(int dfd, const char *name,
 			struct path link = path;
 			nd->flags |= LOOKUP_PARENT;
 			err = follow_link(&link, nd, &cookie);
-			if (!err)
-				err = lookup_last(nd, &path);
+			if (err)
+				break;
+			err = lookup_last(nd, &path);
 			put_link(nd, &link, cookie);
 		}
 	}
@@ -2475,9 +2484,8 @@ static struct file *path_openat(int dfd, const char *pathname,
 		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
 		error = follow_link(&link, nd, &cookie);
 		if (unlikely(error))
-			filp = ERR_PTR(error);
-		else
-			filp = do_last(nd, &path, op, pathname);
+			goto out_filp;
+		filp = do_last(nd, &path, op, pathname);
 		put_link(nd, &link, cookie);
 	}
 out:
-- 
cgit v1.2.3-70-g09d2


From 37d7fffc9cafe75ded8a840fa30ba625f99ed7ae Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:12 +0200
Subject: vfs: do_last(): inline lookup_slow()

Copy lookup_slow() into do_last().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6135a14d5a8..68742e3cb98 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2254,9 +2254,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			if (error < 0)
 				goto exit;
 
-			error = lookup_slow(nd, &nd->last, path);
-			if (error < 0)
+			BUG_ON(nd->inode != dir->d_inode);
+
+			mutex_lock(&dir->d_inode->i_mutex);
+			dentry = __lookup_hash(&nd->last, dir, nd);
+			mutex_unlock(&dir->d_inode->i_mutex);
+			error = PTR_ERR(dentry);
+			if (IS_ERR(dentry))
 				goto exit;
+			path->mnt = nd->path.mnt;
+			path->dentry = dentry;
+			error = follow_managed(path, nd->flags);
+			if (unlikely(error < 0))
+				goto exit_dput;
+
+			if (error)
+				nd->flags |= LOOKUP_JUMPED;
 
 			inode = path->dentry->d_inode;
 		}
-- 
cgit v1.2.3-70-g09d2


From b6183df7b294997a748eeb9991daa126986ead12 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:13 +0200
Subject: vfs: do_last(): separate O_CREAT specific code

Check O_CREAT on the slow lookup paths where necessary.  This allows the rest to
be shared with plain open.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 68742e3cb98..12ed29712b4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2274,22 +2274,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			inode = path->dentry->d_inode;
 		}
 		goto finish_lookup;
-	}
-
-	/* create side of things */
-	/*
-	 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
-	 * cleared when we got to the last component we are about to look up
-	 */
-	error = complete_walk(nd);
-	if (error)
-		return ERR_PTR(error);
+	} else {
+		/* create side of things */
+		/*
+		 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
+		 * has been cleared when we got to the last component we are
+		 * about to look up
+		 */
+		error = complete_walk(nd);
+		if (error)
+			return ERR_PTR(error);
 
-	audit_inode(pathname, dir);
-	error = -EISDIR;
-	/* trailing slashes? */
-	if (nd->last.name[nd->last.len])
-		goto exit;
+		audit_inode(pathname, dir);
+		error = -EISDIR;
+		/* trailing slashes? */
+		if (nd->last.name[nd->last.len])
+			goto exit;
+	}
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
@@ -2305,7 +2306,7 @@ retry_lookup:
 	path->mnt = nd->path.mnt;
 
 	/* Negative dentry, just create the file */
-	if (!dentry->d_inode) {
+	if (!dentry->d_inode && (open_flag & O_CREAT)) {
 		umode_t mode = op->mode;
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current_umask();
-- 
cgit v1.2.3-70-g09d2


From 7157486541bffc0dfec912e21ae639b029dae3d3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:14 +0200
Subject: vfs: do_last(): common slow lookup

Make the slow lookup part of O_CREAT and non-O_CREAT opens common.

This allows atomic_open to be hooked into the slow lookup part.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 27 +++++----------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 12ed29712b4..285e62e925f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2250,30 +2250,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			symlink_ok = 1;
 		/* we _can_ be in RCU mode here */
 		error = lookup_fast(nd, &nd->last, path, &inode);
-		if (unlikely(error)) {
-			if (error < 0)
-				goto exit;
-
-			BUG_ON(nd->inode != dir->d_inode);
-
-			mutex_lock(&dir->d_inode->i_mutex);
-			dentry = __lookup_hash(&nd->last, dir, nd);
-			mutex_unlock(&dir->d_inode->i_mutex);
-			error = PTR_ERR(dentry);
-			if (IS_ERR(dentry))
-				goto exit;
-			path->mnt = nd->path.mnt;
-			path->dentry = dentry;
-			error = follow_managed(path, nd->flags);
-			if (unlikely(error < 0))
-				goto exit_dput;
+		if (likely(!error))
+			goto finish_lookup;
 
-			if (error)
-				nd->flags |= LOOKUP_JUMPED;
+		if (error < 0)
+			goto exit;
 
-			inode = path->dentry->d_inode;
-		}
-		goto finish_lookup;
+		BUG_ON(nd->inode != dir->d_inode);
 	} else {
 		/* create side of things */
 		/*
-- 
cgit v1.2.3-70-g09d2


From d58ffd35c1e595df2cf8ac4803f178c8be95ca7a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:15 +0200
Subject: vfs: add lookup_open()

Split out lookup + maybe create from do_last().  This is the part under i_mutex
protection.

The function is called lookup_open() and returns a filp even though the open
part is not used yet.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 99 ++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 61 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 285e62e925f..fad7117dbb2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,6 +2196,60 @@ static inline int open_to_namei_flags(int flag)
 	return flag;
 }
 
+/*
+ * Lookup, maybe create and open the last component
+ *
+ * Must be called with i_mutex held on parent.
+ *
+ * Returns open file or NULL on success, error otherwise.  NULL means no open
+ * was performed, only lookup.
+ */
+static struct file *lookup_open(struct nameidata *nd, struct path *path,
+				const struct open_flags *op,
+				int *want_write, bool *created)
+{
+	struct dentry *dir = nd->path.dentry;
+	struct dentry *dentry;
+	int error;
+
+	*created = false;
+	dentry = lookup_hash(nd);
+	if (IS_ERR(dentry))
+		return ERR_CAST(dentry);
+
+	/* Negative dentry, just create the file */
+	if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
+		umode_t mode = op->mode;
+		if (!IS_POSIXACL(dir->d_inode))
+			mode &= ~current_umask();
+		/*
+		 * This write is needed to ensure that a
+		 * rw->ro transition does not occur between
+		 * the time when the file is created and when
+		 * a permanent write count is taken through
+		 * the 'struct file' in nameidata_to_filp().
+		 */
+		error = mnt_want_write(nd->path.mnt);
+		if (error)
+			goto out_dput;
+		*want_write = 1;
+		*created = true;
+		error = security_path_mknod(&nd->path, dentry, mode, 0);
+		if (error)
+			goto out_dput;
+		error = vfs_create(dir->d_inode, dentry, mode, nd);
+		if (error)
+			goto out_dput;
+	}
+	path->dentry = dentry;
+	path->mnt = nd->path.mnt;
+	return NULL;
+
+out_dput:
+	dput(dentry);
+	return ERR_PTR(error);
+}
+
 /*
  * Handle the last step of open()
  */
@@ -2203,13 +2257,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			    const struct open_flags *op, const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
-	struct dentry *dentry;
 	int open_flag = op->open_flag;
 	int will_truncate = open_flag & O_TRUNC;
 	int want_write = 0;
 	int acc_mode = op->acc_mode;
 	struct file *filp;
 	struct inode *inode;
+	bool created;
 	int symlink_ok = 0;
 	struct path save_parent = { .dentry = NULL, .mnt = NULL };
 	bool retried = false;
@@ -2277,53 +2331,24 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
+	filp = lookup_open(nd, path, op, &want_write, &created);
+	mutex_unlock(&dir->d_inode->i_mutex);
 
-	dentry = lookup_hash(nd);
-	error = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
-		mutex_unlock(&dir->d_inode->i_mutex);
-		goto exit;
-	}
-
-	path->dentry = dentry;
-	path->mnt = nd->path.mnt;
+	if (IS_ERR(filp))
+		goto out;
 
-	/* Negative dentry, just create the file */
-	if (!dentry->d_inode && (open_flag & O_CREAT)) {
-		umode_t mode = op->mode;
-		if (!IS_POSIXACL(dir->d_inode))
-			mode &= ~current_umask();
-		/*
-		 * This write is needed to ensure that a
-		 * rw->ro transition does not occur between
-		 * the time when the file is created and when
-		 * a permanent write count is taken through
-		 * the 'struct file' in nameidata_to_filp().
-		 */
-		error = mnt_want_write(nd->path.mnt);
-		if (error)
-			goto exit_mutex_unlock;
-		want_write = 1;
+	if (created) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
 		will_truncate = 0;
 		acc_mode = MAY_OPEN;
-		error = security_path_mknod(&nd->path, dentry, mode, 0);
-		if (error)
-			goto exit_mutex_unlock;
-		error = vfs_create(dir->d_inode, dentry, mode, nd);
-		if (error)
-			goto exit_mutex_unlock;
-		mutex_unlock(&dir->d_inode->i_mutex);
-		dput(nd->path.dentry);
-		nd->path.dentry = dentry;
+		path_to_nameidata(path, nd);
 		goto common;
 	}
 
 	/*
 	 * It already exists.
 	 */
-	mutex_unlock(&dir->d_inode->i_mutex);
 	audit_inode(pathname, path->dentry);
 
 	error = -EEXIST;
@@ -2432,8 +2457,6 @@ out:
 	terminate_walk(nd);
 	return filp;
 
-exit_mutex_unlock:
-	mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
 	path_put_conditional(path, nd);
 exit:
-- 
cgit v1.2.3-70-g09d2


From 54ef487241e863a6046536ac5b1fcd5d7cde86e5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:16 +0200
Subject: vfs: lookup_open(): expand lookup_hash()

Copy __lookup_hash() into lookup_open().  The next patch will insert the atomic
open call just before the real lookup.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index fad7117dbb2..ccb0eb17f52 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2209,14 +2209,24 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 				int *want_write, bool *created)
 {
 	struct dentry *dir = nd->path.dentry;
+	struct inode *dir_inode = dir->d_inode;
 	struct dentry *dentry;
 	int error;
+	bool need_lookup;
 
 	*created = false;
-	dentry = lookup_hash(nd);
+	dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup);
 	if (IS_ERR(dentry))
 		return ERR_CAST(dentry);
 
+	if (need_lookup) {
+		BUG_ON(dentry->d_inode);
+
+		dentry = lookup_real(dir_inode, dentry, nd);
+		if (IS_ERR(dentry))
+			return ERR_CAST(dentry);
+	}
+
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
 		umode_t mode = op->mode;
-- 
cgit v1.2.3-70-g09d2


From d18e9008c377dc6a6d2166a6840bf3a23a5867fd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:17 +0200
Subject: vfs: add i_op->atomic_open()

Add a new inode operation which is called on the last component of an open.
Using this the filesystem can look up, possibly create and open the file in one
atomic operation.  If it cannot perform this (e.g. the file type turned out to
be wrong) it may signal this by returning NULL instead of an open struct file
pointer.

i_op->atomic_open() is only called if the last component is negative or needs
lookup.  Handling cached positive dentries here doesn't add much value: these
can be opened using f_op->open().  If the cached file turns out to be invalid,
the open can be retried, this time using ->atomic_open() with a fresh dentry.

For now leave the old way of using open intents in lookup and revalidate in
place.  This will be removed once all the users are converted.

David Howells noticed that if ->atomic_open() opens the file but does not create
it, handle_truncate() will be called on it even if it is not a regular file.
Fix this by checking the file type in this case too.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |   4 +
 Documentation/filesystems/vfs.txt |  11 +++
 fs/internal.h                     |   5 +
 fs/namei.c                        | 203 +++++++++++++++++++++++++++++++++++++-
 fs/open.c                         |  42 ++++++++
 include/linux/fs.h                |   7 ++
 6 files changed, 270 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8e2da1e06e3..8157488c346 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,6 +62,9 @@ ata *);
 	int (*removexattr) (struct dentry *, const char *);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
+	struct file * (*atomic_open)(struct inode *, struct dentry *,
+				struct opendata *, unsigned open_flag,
+				umode_t create_mode, bool *created);
 
 locking rules:
 	all may block
@@ -89,6 +92,7 @@ listxattr:	no
 removexattr:	yes
 fiemap:		no
 update_time:	no
+atomic_open:	yes
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index efd23f48170..beb6e691f70 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,9 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*update_time)(struct inode *, struct timespec *, int);
+	struct file * (*atomic_open)(struct inode *, struct dentry *,
+				struct opendata *, unsigned open_flag,
+				umode_t create_mode, bool *created);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -476,6 +479,14 @@ otherwise noted.
   	an inode.  If this is not defined the VFS will update the inode itself
   	and call mark_inode_dirty_sync.
 
+  atomic_open: called on the last component of an open.  Using this optional
+  	method the filesystem can look up, possibly create and open the file in
+  	one atomic operation.  If it cannot perform this (e.g. the file type
+  	turned out to be wrong) it may signal this by returning NULL instead of
+  	an open struct file pointer.  This method is only called if the last
+  	component is negative or needs lookup.  Cached positive dentries are
+  	still handled by f_op->open().
+
 The Address Space Object
 ========================
 
diff --git a/fs/internal.h b/fs/internal.h
index d2a23ff61b4..70067775df2 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t);
 struct nameidata;
 extern struct file *nameidata_to_filp(struct nameidata *);
 extern void release_open_intent(struct nameidata *);
+struct opendata {
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	struct file **filp;
+};
 struct open_flags {
 	int open_flag;
 	umode_t mode;
diff --git a/fs/namei.c b/fs/namei.c
index ccb0eb17f52..9e11ae83bff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag)
 	return flag;
 }
 
+static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+{
+	int error = security_path_mknod(dir, dentry, mode, 0);
+	if (error)
+		return error;
+
+	error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+	if (error)
+		return error;
+
+	return security_inode_create(dir->dentry->d_inode, dentry, mode);
+}
+
+static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
+				struct path *path, const struct open_flags *op,
+				int *want_write, bool need_lookup,
+				bool *created)
+{
+	struct inode *dir =  nd->path.dentry->d_inode;
+	unsigned open_flag = open_to_namei_flags(op->open_flag);
+	umode_t mode;
+	int error;
+	int acc_mode;
+	struct opendata od;
+	struct file *filp;
+	int create_error = 0;
+	struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+
+	BUG_ON(dentry->d_inode);
+
+	/* Don't create child dentry for a dead directory. */
+	if (unlikely(IS_DEADDIR(dir))) {
+		filp = ERR_PTR(-ENOENT);
+		goto out;
+	}
+
+	mode = op->mode & S_IALLUGO;
+	if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
+		mode &= ~current_umask();
+
+	if (open_flag & O_EXCL) {
+		open_flag &= ~O_TRUNC;
+		*created = true;
+	}
+
+	/*
+	 * Checking write permission is tricky, bacuse we don't know if we are
+	 * going to actually need it: O_CREAT opens should work as long as the
+	 * file exists.  But checking existence breaks atomicity.  The trick is
+	 * to check access and if not granted clear O_CREAT from the flags.
+	 *
+	 * Another problem is returing the "right" error value (e.g. for an
+	 * O_EXCL open we want to return EEXIST not EROFS).
+	 */
+	if ((open_flag & (O_CREAT | O_TRUNC)) ||
+	    (open_flag & O_ACCMODE) != O_RDONLY) {
+		error = mnt_want_write(nd->path.mnt);
+		if (!error) {
+			*want_write = 1;
+		} else if (!(open_flag & O_CREAT)) {
+			/*
+			 * No O_CREATE -> atomicity not a requirement -> fall
+			 * back to lookup + open
+			 */
+			goto no_open;
+		} else if (open_flag & (O_EXCL | O_TRUNC)) {
+			/* Fall back and fail with the right error */
+			create_error = error;
+			goto no_open;
+		} else {
+			/* No side effects, safe to clear O_CREAT */
+			create_error = error;
+			open_flag &= ~O_CREAT;
+		}
+	}
+
+	if (open_flag & O_CREAT) {
+		error = may_o_create(&nd->path, dentry, op->mode);
+		if (error) {
+			create_error = error;
+			if (open_flag & O_EXCL)
+				goto no_open;
+			open_flag &= ~O_CREAT;
+		}
+	}
+
+	if (nd->flags & LOOKUP_DIRECTORY)
+		open_flag |= O_DIRECTORY;
+
+	od.dentry = DENTRY_NOT_SET;
+	od.mnt = nd->path.mnt;
+	od.filp = &nd->intent.open.file;
+	filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+				      created);
+	if (IS_ERR(filp)) {
+		if (WARN_ON(od.dentry != DENTRY_NOT_SET))
+			dput(od.dentry);
+
+		if (create_error && PTR_ERR(filp) == -ENOENT)
+			filp = ERR_PTR(create_error);
+		goto out;
+	}
+
+	acc_mode = op->acc_mode;
+	if (*created) {
+		fsnotify_create(dir, dentry);
+		acc_mode = MAY_OPEN;
+	}
+
+	if (!filp) {
+		if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+			filp = ERR_PTR(-EIO);
+			goto out;
+		}
+		if (od.dentry) {
+			dput(dentry);
+			dentry = od.dentry;
+		}
+		goto looked_up;
+	}
+
+	/*
+	 * We didn't have the inode before the open, so check open permission
+	 * here.
+	 */
+	error = may_open(&filp->f_path, acc_mode, open_flag);
+	if (error)
+		goto out_fput;
+
+	error = open_check_o_direct(filp);
+	if (error)
+		goto out_fput;
+
+out:
+	dput(dentry);
+	return filp;
+
+out_fput:
+	fput(filp);
+	filp = ERR_PTR(error);
+	goto out;
+
+no_open:
+	if (need_lookup) {
+		dentry = lookup_real(dir, dentry, nd);
+		if (IS_ERR(dentry))
+			return ERR_CAST(dentry);
+
+		if (create_error) {
+			int open_flag = op->open_flag;
+
+			filp = ERR_PTR(create_error);
+			if ((open_flag & O_EXCL)) {
+				if (!dentry->d_inode)
+					goto out;
+			} else if (!dentry->d_inode) {
+				goto out;
+			} else if ((open_flag & O_TRUNC) &&
+				   S_ISREG(dentry->d_inode->i_mode)) {
+				goto out;
+			}
+			/* will fail later, go on to get the right error */
+		}
+	}
+looked_up:
+	path->dentry = dentry;
+	path->mnt = nd->path.mnt;
+	return NULL;
+}
+
 /*
  * Lookup, maybe create and open the last component
  *
@@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 	if (IS_ERR(dentry))
 		return ERR_CAST(dentry);
 
+	/* Cached positive dentry: will open in f_op->open */
+	if (!need_lookup && dentry->d_inode)
+		goto out_no_open;
+
+	if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
+		return atomic_open(nd, dentry, path, op, want_write,
+				   need_lookup, created);
+	}
+
 	if (need_lookup) {
 		BUG_ON(dentry->d_inode);
 
@@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		if (error)
 			goto out_dput;
 	}
+out_no_open:
 	path->dentry = dentry;
 	path->mnt = nd->path.mnt;
 	return NULL;
@@ -2344,8 +2524,16 @@ retry_lookup:
 	filp = lookup_open(nd, path, op, &want_write, &created);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
-	if (IS_ERR(filp))
-		goto out;
+	if (filp) {
+		if (IS_ERR(filp))
+			goto out;
+
+		if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+			will_truncate = 0;
+
+		audit_inode(pathname, filp->f_path.dentry);
+		goto opened;
+	}
 
 	if (created) {
 		/* Don't check for write permission, don't truncate */
@@ -2361,6 +2549,16 @@ retry_lookup:
 	 */
 	audit_inode(pathname, path->dentry);
 
+	/*
+	 * If atomic_open() acquired write access it is dropped now due to
+	 * possible mount and symlink following (this might be optimized away if
+	 * necessary...)
+	 */
+	if (want_write) {
+		mnt_drop_write(nd->path.mnt);
+		want_write = 0;
+	}
+
 	error = -EEXIST;
 	if (open_flag & O_EXCL)
 		goto exit_dput;
@@ -2444,6 +2642,7 @@ common:
 		retried = true;
 		goto retry_lookup;
 	}
+opened:
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, op->acc_mode);
 		if (error) {
diff --git a/fs/open.c b/fs/open.c
index 1540632d838..13bece4f36a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -810,6 +810,48 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
 
+/**
+ * finish_open - finish opening a file
+ * @od: opaque open data
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * This can be used to finish opening a file passed to i_op->atomic_open().
+ *
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *finish_open(struct opendata *od, struct dentry *dentry,
+			 int (*open)(struct inode *, struct file *))
+{
+	struct file *res;
+
+	mntget(od->mnt);
+	dget(dentry);
+
+	res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
+	if (!IS_ERR(res))
+		*od->filp = NULL;
+
+	return res;
+}
+EXPORT_SYMBOL(finish_open);
+
+/**
+ * finish_no_open - finish ->atomic_open() without opening the file
+ *
+ * @od: opaque open data
+ * @dentry: dentry or NULL (as returned from ->lookup())
+ *
+ * This can be used to set the result of a successful lookup in ->atomic_open().
+ * The filesystem's atomic_open() method shall return NULL after calling this.
+ */
+void finish_no_open(struct opendata *od, struct dentry *dentry)
+{
+	od->dentry = dentry;
+}
+EXPORT_SYMBOL(finish_no_open);
+
 /**
  * nameidata_to_filp - convert a nameidata to an open filp.
  * @nd: pointer to nameidata
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f06db6bd5a7..0314635cf83 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ struct kstatfs;
 struct vm_area_struct;
 struct vfsmount;
 struct cred;
+struct opendata;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1693,6 +1694,9 @@ struct inode_operations {
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 	int (*update_time)(struct inode *, struct timespec *, int);
+	struct file * (*atomic_open)(struct inode *, struct dentry *,
+				     struct opendata *, unsigned open_flag,
+				     umode_t create_mode, bool *created);
 } ____cacheline_aligned;
 
 struct seq_file;
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
 				 const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
+extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
+				int (*open)(struct inode *, struct file *));
+extern void finish_no_open(struct opendata *od, struct dentry *dentry);
 
 /* fs/ioctl.c */
 
-- 
cgit v1.2.3-70-g09d2


From 0dd2b474d0b69d58859399b1df7fdc699ea005d4 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:18 +0200
Subject: nfs: implement i_op->atomic_open()

Replace NFS4 specific ->lookup implementation with ->atomic_open impelementation
and use the generic nfs_lookup for other lookups.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 183 +++++++++++++++++++++++++++++++----------------------------
 1 file changed, 97 insertions(+), 86 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f430057ff3b..0d8c71271d1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -111,11 +111,15 @@ const struct inode_operations nfs3_dir_inode_operations = {
 
 #ifdef CONFIG_NFS_V4
 
-static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
+static struct file *nfs_atomic_open(struct inode *, struct dentry *,
+				    struct opendata *, unsigned, umode_t,
+				    bool *);
+static int nfs4_create(struct inode *dir, struct dentry *dentry,
+		       umode_t mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
-	.create		= nfs_open_create,
-	.lookup		= nfs_atomic_lookup,
+	.create		= nfs4_create,
+	.lookup		= nfs_lookup,
+	.atomic_open	= nfs_atomic_open,
 	.link		= nfs_link,
 	.unlink		= nfs_unlink,
 	.symlink	= nfs_symlink,
@@ -1403,120 +1407,132 @@ static int do_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+static struct file *nfs_finish_open(struct nfs_open_context *ctx,
+				    struct dentry *dentry,
+				    struct opendata *od, unsigned open_flags)
 {
 	struct file *filp;
-	int ret = 0;
+	int err;
+
+	if (ctx->dentry != dentry) {
+		dput(ctx->dentry);
+		ctx->dentry = dget(dentry);
+	}
 
 	/* If the open_intent is for execute, we have an extra check to make */
 	if (ctx->mode & FMODE_EXEC) {
-		ret = nfs_may_open(ctx->dentry->d_inode,
-				ctx->cred,
-				nd->intent.open.flags);
-		if (ret < 0)
+		err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
+		if (err < 0) {
+			filp = ERR_PTR(err);
 			goto out;
+		}
 	}
-	filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
-	if (IS_ERR(filp))
-		ret = PTR_ERR(filp);
-	else
+
+	filp = finish_open(od, dentry, do_open);
+	if (!IS_ERR(filp))
 		nfs_file_set_open_context(filp, ctx);
+
 out:
 	put_nfs_open_context(ctx);
-	return ret;
+	return filp;
 }
 
-static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+				    struct opendata *od, unsigned open_flags,
+				    umode_t mode, bool *created)
 {
 	struct nfs_open_context *ctx;
-	struct iattr attr;
-	struct dentry *res = NULL;
+	struct dentry *res;
+	struct iattr attr = { .ia_valid = ATTR_OPEN };
 	struct inode *inode;
-	int open_flags;
+	struct file *filp;
 	int err;
 
-	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+	/* Expect a negative dentry */
+	BUG_ON(dentry->d_inode);
+
+	dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
-	/* Check that we are indeed trying to open this file */
-	if (!is_atomic_open(nd))
+	/* NFS only supports OPEN on regular files */
+	if ((open_flags & O_DIRECTORY)) {
+		err = -ENOENT;
+		if (!d_unhashed(dentry)) {
+			/*
+			 * Hashed negative dentry with O_DIRECTORY: dentry was
+			 * revalidated and is fine, no need to perform lookup
+			 * again
+			 */
+			goto out_err;
+		}
 		goto no_open;
-
-	if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
-		res = ERR_PTR(-ENAMETOOLONG);
-		goto out;
 	}
 
-	/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
-	 * the dentry. */
-	if (nd->flags & LOOKUP_EXCL) {
-		d_instantiate(dentry, NULL);
-		goto out;
-	}
-
-	open_flags = nd->intent.open.flags;
-	attr.ia_valid = ATTR_OPEN;
-
-	ctx = create_nfs_open_context(dentry, open_flags);
-	res = ERR_CAST(ctx);
-	if (IS_ERR(ctx))
-		goto out;
+	err = -ENAMETOOLONG;
+	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+		goto out_err;
 
-	if (nd->flags & LOOKUP_CREATE) {
-		attr.ia_mode = nd->intent.open.create_mode;
+	if (open_flags & O_CREAT) {
 		attr.ia_valid |= ATTR_MODE;
-		attr.ia_mode &= ~current_umask();
-	} else
-		open_flags &= ~(O_EXCL | O_CREAT);
-
+		attr.ia_mode = mode & ~current_umask();
+	}
 	if (open_flags & O_TRUNC) {
 		attr.ia_valid |= ATTR_SIZE;
 		attr.ia_size = 0;
 	}
 
-	/* Open the file on the server */
+	ctx = create_nfs_open_context(dentry, open_flags);
+	err = PTR_ERR(ctx);
+	if (IS_ERR(ctx))
+		goto out_err;
+
 	nfs_block_sillyrename(dentry->d_parent);
 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+	d_drop(dentry);
 	if (IS_ERR(inode)) {
 		nfs_unblock_sillyrename(dentry->d_parent);
 		put_nfs_open_context(ctx);
-		switch (PTR_ERR(inode)) {
-			/* Make a negative dentry */
-			case -ENOENT:
-				d_add(dentry, NULL);
-				res = NULL;
-				goto out;
-			/* This turned out not to be a regular file */
-			case -EISDIR:
-			case -ENOTDIR:
+		err = PTR_ERR(inode);
+		switch (err) {
+		case -ENOENT:
+			d_add(dentry, NULL);
+			break;
+		case -EISDIR:
+		case -ENOTDIR:
+			goto no_open;
+		case -ELOOP:
+			if (!(open_flags & O_NOFOLLOW))
 				goto no_open;
-			case -ELOOP:
-				if (!(nd->intent.open.flags & O_NOFOLLOW))
-					goto no_open;
+			break;
 			/* case -EINVAL: */
-			default:
-				res = ERR_CAST(inode);
-				goto out;
+		default:
+			break;
 		}
+		goto out_err;
 	}
 	res = d_add_unique(dentry, inode);
-	nfs_unblock_sillyrename(dentry->d_parent);
-	if (res != NULL) {
-		dput(ctx->dentry);
-		ctx->dentry = dget(res);
+	if (res != NULL)
 		dentry = res;
-	}
-	err = nfs_intent_set_file(nd, ctx);
-	if (err < 0) {
-		if (res != NULL)
-			dput(res);
-		return ERR_PTR(err);
-	}
-out:
+
+	nfs_unblock_sillyrename(dentry->d_parent);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	return res;
+
+	filp = nfs_finish_open(ctx, dentry, od, open_flags);
+
+	dput(res);
+	return filp;
+
+out_err:
+	return ERR_PTR(err);
+
 no_open:
-	return nfs_lookup(dir, dentry, nd);
+	res = nfs_lookup(dir, dentry, NULL);
+	err = PTR_ERR(res);
+	if (IS_ERR(res))
+		goto out_err;
+
+	finish_no_open(od, res);
+	return NULL;
 }
 
 static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1566,8 +1582,8 @@ no_open:
 	return nfs_lookup_revalidate(dentry, nd);
 }
 
-static int nfs_open_create(struct inode *dir, struct dentry *dentry,
-		umode_t mode, struct nameidata *nd)
+static int nfs4_create(struct inode *dir, struct dentry *dentry,
+		       umode_t mode, struct nameidata *nd)
 {
 	struct nfs_open_context *ctx = NULL;
 	struct iattr attr;
@@ -1591,19 +1607,14 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry,
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
 	if (error != 0)
 		goto out_put_ctx;
-	if (nd) {
-		error = nfs_intent_set_file(nd, ctx);
-		if (error < 0)
-			goto out_err;
-	} else {
-		put_nfs_open_context(ctx);
-	}
+
+	put_nfs_open_context(ctx);
+
 	return 0;
 out_put_ctx:
 	put_nfs_open_context(ctx);
 out_err_drop:
 	d_drop(dentry);
-out_err:
 	return error;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8867fe5899010a0c0ac36dadfdacf1072b1c990c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:19 +0200
Subject: nfs: clean up ->create in nfs_rpc_ops

Don't pass nfs_open_context() to ->create().  Only the NFS4 implementation
needed that and only because it wanted to return an open file using open
intents.  That task has been replaced by ->atomic_open so it is not necessary
anymore to pass the context to the create rpc operation.

Despite nfs4_proc_create apparently being okay with a NULL context it Oopses
somewhere down the call chain.  So allocate a context here.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c            | 42 ++----------------------------------------
 fs/nfs/nfs3proc.c       |  2 +-
 fs/nfs/nfs4proc.c       | 37 ++++++++++---------------------------
 fs/nfs/proc.c           |  2 +-
 include/linux/nfs_xdr.h |  2 +-
 5 files changed, 15 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0d8c71271d1..45015d32a86 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -114,10 +114,8 @@ const struct inode_operations nfs3_dir_inode_operations = {
 static struct file *nfs_atomic_open(struct inode *, struct dentry *,
 				    struct opendata *, unsigned, umode_t,
 				    bool *);
-static int nfs4_create(struct inode *dir, struct dentry *dentry,
-		       umode_t mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
-	.create		= nfs4_create,
+	.create		= nfs_create,
 	.lookup		= nfs_lookup,
 	.atomic_open	= nfs_atomic_open,
 	.link		= nfs_link,
@@ -1582,42 +1580,6 @@ no_open:
 	return nfs_lookup_revalidate(dentry, nd);
 }
 
-static int nfs4_create(struct inode *dir, struct dentry *dentry,
-		       umode_t mode, struct nameidata *nd)
-{
-	struct nfs_open_context *ctx = NULL;
-	struct iattr attr;
-	int error;
-	int open_flags = O_CREAT|O_EXCL;
-
-	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
-
-	attr.ia_mode = mode;
-	attr.ia_valid = ATTR_MODE;
-
-	if (nd)
-		open_flags = nd->intent.open.flags;
-
-	ctx = create_nfs_open_context(dentry, open_flags);
-	error = PTR_ERR(ctx);
-	if (IS_ERR(ctx))
-		goto out_err_drop;
-
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
-	if (error != 0)
-		goto out_put_ctx;
-
-	put_nfs_open_context(ctx);
-
-	return 0;
-out_put_ctx:
-	put_nfs_open_context(ctx);
-out_err_drop:
-	d_drop(dentry);
-	return error;
-}
-
 #endif /* CONFIG_NFSV4 */
 
 /*
@@ -1684,7 +1646,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
 	if (nd)
 		open_flags = nd->intent.open.flags;
 
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
 	if (error != 0)
 		goto out_err;
 	return 0;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2292a0fd2bf..3187e24e8f7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		 int flags, struct nfs_open_context *ctx)
+		 int flags)
 {
 	struct nfs3_createdata *data;
 	umode_t mode = sattr->ia_mode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 15fc7e4664e..c157b2089b4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
 }
 
 /*
- * Got race?
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this create/open method is prone to inefficiency and race
- * conditions due to the lookup, create, and open VFS calls from sys_open()
- * placed on the wire.
- *
- * Given the above sorry state of affairs, I'm simply sending an OPEN.
- * The file will be opened again in the subsequent VFS open call
- * (nfs4_proc_file_open).
- *
- * The open for read will just hang around to be used by any process that
- * opens the file O_RDONLY. This will all be resolved with the VFS changes.
+ * This is just for mknod.  open(O_CREAT) will always do ->open_context().
  */
-
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags, struct nfs_open_context *ctx)
+		 int flags)
 {
-	struct dentry *de = dentry;
+	struct nfs_open_context *ctx;
 	struct nfs4_state *state;
-	struct rpc_cred *cred = NULL;
-	fmode_t fmode = 0;
 	int status = 0;
 
-	if (ctx != NULL) {
-		cred = ctx->cred;
-		de = ctx->dentry;
-		fmode = ctx->mode;
-	}
+	ctx = alloc_nfs_open_context(dentry, FMODE_READ);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
 	sattr->ia_mode &= ~current_umask();
-	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
+	state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	}
 	d_add(dentry, igrab(state->inode));
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	if (ctx != NULL)
-		ctx->state = state;
-	else
-		nfs4_close_sync(state, fmode);
+	ctx->state = state;
 out:
+	put_nfs_open_context(ctx);
 	return status;
 }
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 617c7419a08..4433806e116 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		int flags, struct nfs_open_context *ctx)
+		int flags)
 {
 	struct nfs_createdata *data;
 	struct rpc_message msg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8aadd90b808..d3b7c18b18f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1374,7 +1374,7 @@ struct nfs_rpc_ops {
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
 	int	(*create)  (struct inode *, struct dentry *,
-			    struct iattr *, int, struct nfs_open_context *);
+			    struct iattr *, int);
 	int	(*remove)  (struct inode *, struct qstr *);
 	void	(*unlink_setup)  (struct rpc_message *, struct inode *dir);
 	void	(*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *);
-- 
cgit v1.2.3-70-g09d2


From 50de348c3604f7684a89ce64180666d4dd74623f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:20 +0200
Subject: nfs: don't use nd->intent.open.flags

Instead check LOOKUP_EXCL in nd->flags, which is basically what the open intent
flags were used for.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 45015d32a86..0432f474771 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1538,7 +1538,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct dentry *parent = NULL;
 	struct inode *inode;
 	struct inode *dir;
-	int openflags, ret = 0;
+	int ret = 0;
 
 	if (nd->flags & LOOKUP_RCU)
 		return -ECHILD;
@@ -1562,9 +1562,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* NFS only supports OPEN on regular files */
 	if (!S_ISREG(inode->i_mode))
 		goto no_open_dput;
-	openflags = nd->intent.open.flags;
 	/* We cannot do exclusive creation on a positive dentry */
-	if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+	if (nd && nd->flags & LOOKUP_EXCL)
 		goto no_open_dput;
 
 	/* Let f_op->open() actually open (and revalidate) the file */
@@ -1643,8 +1642,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if (nd)
-		open_flags = nd->intent.open.flags;
+	if (nd && !(nd->flags & LOOKUP_EXCL))
+		open_flags = O_CREAT;
 
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
 	if (error != 0)
-- 
cgit v1.2.3-70-g09d2


From eda72afb9ef9f45941fb09260c0f268ff81ec40d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:21 +0200
Subject: nfs: don't use intents for checking atomic open

is_atomic_open() is now only used by nfs4_lookup_revalidate() to check whether
it's okay to skip normal revalidation.

It does a racy check for mount read-onlyness and falls back to normal
revalidation if the open would fail.  This makes little sense now that this
function isn't used for determining whether to actually open the file or not.

The d_mountpoint() check still makes sense since it is an indication that we
might be following a mount and so open may not revalidate the dentry.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0432f474771..e6d55dc93ff 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1366,24 +1366,6 @@ const struct dentry_operations nfs4_dentry_operations = {
 	.d_release	= nfs_d_release,
 };
 
-/*
- * Use intent information to determine whether we need to substitute
- * the NFSv4-style stateful OPEN for the LOOKUP call
- */
-static int is_atomic_open(struct nameidata *nd)
-{
-	if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
-		return 0;
-	/* NFS does not (yet) have a stateful open for directories */
-	if (nd->flags & LOOKUP_DIRECTORY)
-		return 0;
-	/* Are we trying to write to a read only partition? */
-	if (__mnt_is_readonly(nd->path.mnt) &&
-	    (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
-		return 0;
-	return 1;
-}
-
 static fmode_t flags_to_mode(int flags)
 {
 	fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
@@ -1543,10 +1525,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (nd->flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	inode = dentry->d_inode;
-	if (!is_atomic_open(nd) || d_mountpoint(dentry))
+	if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY))
+		goto no_open;
+	if (d_mountpoint(dentry))
 		goto no_open;
 
+	inode = dentry->d_inode;
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
 
-- 
cgit v1.2.3-70-g09d2


From c8ccbe032feb127a977c66865cb63d72d9a6e08b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:22 +0200
Subject: fuse: implement i_op->atomic_open()

Add an ->atomic_open implementation which replaces the atomic open+create
operation implemented via ->create.  No functionality is changed.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/dir.c | 94 ++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 67 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f7543f72897..e42442f1da1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,8 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  * If the filesystem doesn't support this, then fall back to separate
  * 'mknod' + 'open' requests.
  */
-static int fuse_create_open(struct inode *dir, struct dentry *entry,
-			    umode_t mode, struct nameidata *nd)
+static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
+				     struct opendata *od, unsigned flags,
+				     umode_t mode)
 {
 	int err;
 	struct inode *inode;
@@ -382,14 +383,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
 	struct file *file;
-	int flags = nd->intent.open.flags;
-
-	if (fc->no_create)
-		return -ENOSYS;
 
 	forget = fuse_alloc_forget();
+	err = -ENOMEM;
 	if (!forget)
-		return -ENOMEM;
+		goto out_err;
 
 	req = fuse_get_req(fc);
 	err = PTR_ERR(req);
@@ -428,11 +426,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	req->out.args[1].value = &outopen;
 	fuse_request_send(fc, req);
 	err = req->out.h.error;
-	if (err) {
-		if (err == -ENOSYS)
-			fc->no_create = 1;
+	if (err)
 		goto out_free_ff;
-	}
 
 	err = -EIO;
 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
@@ -448,28 +443,78 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
 		fuse_sync_release(ff, flags);
 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto out_err;
 	}
 	kfree(forget);
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	file = lookup_instantiate_filp(nd, entry, generic_file_open);
+	file = finish_open(od, entry, generic_file_open);
 	if (IS_ERR(file)) {
 		fuse_sync_release(ff, flags);
-		return PTR_ERR(file);
+	} else {
+		file->private_data = fuse_file_get(ff);
+		fuse_finish_open(inode, file);
 	}
-	file->private_data = fuse_file_get(ff);
-	fuse_finish_open(inode, file);
-	return 0;
+	return file;
 
- out_free_ff:
+out_free_ff:
 	fuse_file_free(ff);
- out_put_request:
+out_put_request:
 	fuse_put_request(fc, req);
- out_put_forget_req:
+out_put_forget_req:
 	kfree(forget);
-	return err;
+out_err:
+	return ERR_PTR(err);
+}
+
+static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
+				     struct opendata *od, unsigned flags,
+				     umode_t mode, bool *created)
+{
+	int err;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct file *file;
+	struct dentry *res = NULL;
+
+	if (d_unhashed(entry)) {
+		res = fuse_lookup(dir, entry, NULL);
+		if (IS_ERR(res))
+			return ERR_CAST(res);
+
+		if (res)
+			entry = res;
+	}
+
+	if (!(flags & O_CREAT) || entry->d_inode)
+		goto no_open;
+
+	/* Only creates */
+	*created = true;
+
+	if (fc->no_create)
+		goto mknod;
+
+	file = fuse_create_open(dir, entry, od, flags, mode);
+	if (PTR_ERR(file) == -ENOSYS) {
+		fc->no_create = 1;
+		goto mknod;
+	}
+out_dput:
+	dput(res);
+	return file;
+
+mknod:
+	err = fuse_mknod(dir, entry, mode, 0);
+	if (err) {
+		file = ERR_PTR(err);
+		goto out_dput;
+	}
+no_open:
+	finish_no_open(od, res);
+	return NULL;
 }
 
 /*
@@ -573,12 +618,6 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
 		       struct nameidata *nd)
 {
-	if (nd) {
-		int err = fuse_create_open(dir, entry, mode, nd);
-		if (err != -ENOSYS)
-			return err;
-		/* Fall back on mknod */
-	}
 	return fuse_mknod(dir, entry, mode, 0);
 }
 
@@ -1646,6 +1685,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
 	.link		= fuse_link,
 	.setattr	= fuse_setattr,
 	.create		= fuse_create,
+	.atomic_open	= fuse_atomic_open,
 	.mknod		= fuse_mknod,
 	.permission	= fuse_permission,
 	.getattr	= fuse_getattr,
-- 
cgit v1.2.3-70-g09d2


From d2c127197dfc0b2bae62a52e1e0d3e3ff493919e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:23 +0200
Subject: cifs: implement i_op->atomic_open()

Add an ->atomic_open implementation which replaces the atomic lookup+open+create
operation implemented via ->lookup and ->create operations.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Steve French <sfrench@samba.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/cifsfs.c |   1 +
 fs/cifs/cifsfs.h |   3 +
 fs/cifs/dir.c    | 441 ++++++++++++++++++++++++++++++-------------------------
 3 files changed, 247 insertions(+), 198 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bcab12c8714..c0c2751a757 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -777,6 +777,7 @@ struct file_system_type cifs_fs_type = {
 };
 const struct inode_operations cifs_dir_inode_ops = {
 	.create = cifs_create,
+	.atomic_open = cifs_atomic_open,
 	.lookup = cifs_lookup,
 	.getattr = cifs_getattr,
 	.unlink = cifs_unlink,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 65365358c97..3a572bf5947 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -46,6 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       struct nameidata *);
+extern struct file *cifs_atomic_open(struct inode *, struct dentry *,
+				     struct opendata *, unsigned, umode_t,
+				     bool *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ec4e9a2a12f..7a3dcd15d68 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -133,108 +133,141 @@ cifs_bp_rename_retry:
 	return full_path;
 }
 
+/*
+ * Don't allow the separator character in a path component.
+ * The VFS will not allow "/", but "\" is allowed by posix.
+ */
+static int
+check_name(struct dentry *direntry)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+	int i;
+
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+		for (i = 0; i < direntry->d_name.len; i++) {
+			if (direntry->d_name.name[i] == '\\') {
+				cFYI(1, "Invalid file name");
+				return -EINVAL;
+			}
+		}
+	}
+	return 0;
+}
+
+
 /* Inode operations in similar order to how they appear in Linux file fs.h */
 
-int
-cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
-		struct nameidata *nd)
+static int cifs_do_create(struct inode *inode, struct dentry *direntry,
+			  int xid, struct tcon_link *tlink, unsigned oflags,
+			  umode_t mode, __u32 *oplock, __u16 *fileHandle,
+			  bool *created)
 {
 	int rc = -ENOENT;
-	int xid;
 	int create_options = CREATE_NOT_DIR;
-	__u32 oplock = 0;
-	int oflags;
-	/*
-	 * BB below access is probably too much for mknod to request
-	 *    but we have to do query and setpathinfo so requesting
-	 *    less could fail (unless we want to request getatr and setatr
-	 *    permissions (only).  At least for POSIX we do not have to
-	 *    request so much.
-	 */
-	int desiredAccess = GENERIC_READ | GENERIC_WRITE;
-	__u16 fileHandle;
-	struct cifs_sb_info *cifs_sb;
-	struct tcon_link *tlink;
-	struct cifs_tcon *tcon;
+	int desiredAccess;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifs_tcon *tcon = tlink_tcon(tlink);
 	char *full_path = NULL;
 	FILE_ALL_INFO *buf = NULL;
 	struct inode *newinode = NULL;
-	int disposition = FILE_OVERWRITE_IF;
-
-	xid = GetXid();
-
-	cifs_sb = CIFS_SB(inode->i_sb);
-	tlink = cifs_sb_tlink(cifs_sb);
-	if (IS_ERR(tlink)) {
-		FreeXid(xid);
-		return PTR_ERR(tlink);
-	}
-	tcon = tlink_tcon(tlink);
+	int disposition;
 
+	*oplock = 0;
 	if (tcon->ses->server->oplocks)
-		oplock = REQ_OPLOCK;
-
-	if (nd)
-		oflags = nd->intent.open.file->f_flags;
-	else
-		oflags = O_RDONLY | O_CREAT;
+		*oplock = REQ_OPLOCK;
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
 		rc = -ENOMEM;
-		goto cifs_create_out;
+		goto out;
 	}
 
 	if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+	    !tcon->broken_posix_open &&
 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = cifs_posix_open(full_path, &newinode,
-			inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
-		/* EIO could indicate that (posix open) operation is not
-		   supported, despite what server claimed in capability
-		   negotiation.  EREMOTE indicates DFS junction, which is not
-		   handled in posix open */
-
-		if (rc == 0) {
-			if (newinode == NULL) /* query inode info */
+			inode->i_sb, mode, oflags, oplock, fileHandle, xid);
+		switch (rc) {
+		case 0:
+			if (newinode == NULL) {
+				/* query inode info */
 				goto cifs_create_get_file_info;
-			else /* success, no need to query */
-				goto cifs_create_set_dentry;
-		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
-			 (rc != -EOPNOTSUPP) && (rc != -EINVAL))
-			goto cifs_create_out;
-		/* else fallthrough to retry, using older open call, this is
-		   case where server does not support this SMB level, and
-		   falsely claims capability (also get here for DFS case
-		   which should be rare for path not covered on files) */
-	}
+			}
 
-	if (nd) {
-		/* if the file is going to stay open, then we
-		   need to set the desired access properly */
-		desiredAccess = 0;
-		if (OPEN_FMODE(oflags) & FMODE_READ)
-			desiredAccess |= GENERIC_READ; /* is this too little? */
-		if (OPEN_FMODE(oflags) & FMODE_WRITE)
-			desiredAccess |= GENERIC_WRITE;
-
-		if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-			disposition = FILE_CREATE;
-		else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
-			disposition = FILE_OVERWRITE_IF;
-		else if ((oflags & O_CREAT) == O_CREAT)
-			disposition = FILE_OPEN_IF;
-		else
-			cFYI(1, "Create flag not set in create function");
+			if (!S_ISREG(newinode->i_mode)) {
+				/*
+				 * The server may allow us to open things like
+				 * FIFOs, but the client isn't set up to deal
+				 * with that. If it's not a regular file, just
+				 * close it and proceed as if it were a normal
+				 * lookup.
+				 */
+				CIFSSMBClose(xid, tcon, *fileHandle);
+				goto cifs_create_get_file_info;
+			}
+			/* success, no need to query */
+			goto cifs_create_set_dentry;
+
+		case -ENOENT:
+			goto cifs_create_get_file_info;
+
+		case -EIO:
+		case -EINVAL:
+			/*
+			 * EIO could indicate that (posix open) operation is not
+			 * supported, despite what server claimed in capability
+			 * negotiation.
+			 *
+			 * POSIX open in samba versions 3.3.1 and earlier could
+			 * incorrectly fail with invalid parameter.
+			 */
+			tcon->broken_posix_open = true;
+			break;
+
+		case -EREMOTE:
+		case -EOPNOTSUPP:
+			/*
+			 * EREMOTE indicates DFS junction, which is not handled
+			 * in posix open.  If either that or op not supported
+			 * returned, follow the normal lookup.
+			 */
+			break;
+
+		default:
+			goto out;
+		}
+		/*
+		 * fallthrough to retry, using older open call, this is case
+		 * where server does not support this SMB level, and falsely
+		 * claims capability (also get here for DFS case which should be
+		 * rare for path not covered on files)
+		 */
 	}
 
+	desiredAccess = 0;
+	if (OPEN_FMODE(oflags) & FMODE_READ)
+		desiredAccess |= GENERIC_READ; /* is this too little? */
+	if (OPEN_FMODE(oflags) & FMODE_WRITE)
+		desiredAccess |= GENERIC_WRITE;
+
+	disposition = FILE_OVERWRITE_IF;
+	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+		disposition = FILE_CREATE;
+	else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+		disposition = FILE_OVERWRITE_IF;
+	else if ((oflags & O_CREAT) == O_CREAT)
+		disposition = FILE_OPEN_IF;
+	else
+		cFYI(1, "Create flag not set in create function");
+
 	/* BB add processing to set equivalent of mode - e.g. via CreateX with
 	   ACLs */
 
 	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 	if (buf == NULL) {
 		rc = -ENOMEM;
-		goto cifs_create_out;
+		goto out;
 	}
 
 	/*
@@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 	if (tcon->ses->capabilities & CAP_NT_SMBS)
 		rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
 			 desiredAccess, create_options,
-			 &fileHandle, &oplock, buf, cifs_sb->local_nls,
+			 fileHandle, oplock, buf, cifs_sb->local_nls,
 			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	else
 		rc = -EIO; /* no NT SMB support fall into legacy open below */
@@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 		/* old server, retry the open legacy style */
 		rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
 			desiredAccess, create_options,
-			&fileHandle, &oplock, buf, cifs_sb->local_nls,
+			fileHandle, oplock, buf, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	}
 	if (rc) {
 		cFYI(1, "cifs_create returned 0x%x", rc);
-		goto cifs_create_out;
+		goto out;
 	}
 
 	/* If Open reported that we actually created a file
 	   then we now have to set the mode if possible */
-	if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+	if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
 		struct cifs_unix_set_info_args args = {
 				.mode	= mode,
 				.ctime	= NO_CHANGE_64,
@@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 				.device	= 0,
 		};
 
+		*created = true;
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 			args.uid = (__u64) current_fsuid();
 			if (inode->i_mode & S_ISGID)
@@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
+		CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle,
 					current->tgid);
 	} else {
 		/* BB implement mode setting via Windows security
@@ -305,11 +339,11 @@ cifs_create_get_file_info:
 					      inode->i_sb, xid);
 	else {
 		rc = cifs_get_inode_info(&newinode, full_path, buf,
-					 inode->i_sb, xid, &fileHandle);
+					 inode->i_sb, xid, fileHandle);
 		if (newinode) {
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
 				newinode->i_mode = mode;
-			if ((oplock & CIFS_CREATE_ACTION) &&
+			if ((*oplock & CIFS_CREATE_ACTION) &&
 			    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
 				newinode->i_uid = current_fsuid();
 				if (inode->i_mode & S_ISGID)
@@ -321,37 +355,139 @@ cifs_create_get_file_info:
 	}
 
 cifs_create_set_dentry:
-	if (rc == 0)
-		d_instantiate(direntry, newinode);
-	else
+	if (rc != 0) {
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+		goto out;
+	}
+	d_drop(direntry);
+	d_add(direntry, newinode);
 
-	if (newinode && nd) {
-		struct cifsFileInfo *pfile_info;
-		struct file *filp;
+	/* ENOENT for create?  How weird... */
+	rc = -ENOENT;
+	if (!newinode) {
+		CIFSSMBClose(xid, tcon, *fileHandle);
+		goto out;
+	}
+	rc = 0;
 
-		filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
-		if (IS_ERR(filp)) {
-			rc = PTR_ERR(filp);
-			CIFSSMBClose(xid, tcon, fileHandle);
-			goto cifs_create_out;
-		}
+out:
+	kfree(buf);
+	kfree(full_path);
+	return rc;
+}
 
-		pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
-		if (pfile_info == NULL) {
-			fput(filp);
-			CIFSSMBClose(xid, tcon, fileHandle);
-			rc = -ENOMEM;
-		}
-	} else {
+struct file *
+cifs_atomic_open(struct inode *inode, struct dentry *direntry,
+		 struct opendata *od, unsigned oflags, umode_t mode,
+		 bool *created)
+{
+	int rc;
+	int xid;
+	struct tcon_link *tlink;
+	struct cifs_tcon *tcon;
+	__u16 fileHandle;
+	__u32 oplock;
+	struct file *filp;
+	struct cifsFileInfo *pfile_info;
+
+	/* Posix open is only called (at lookup time) for file create now.  For
+	 * opens (rather than creates), because we do not know if it is a file
+	 * or directory yet, and current Samba no longer allows us to do posix
+	 * open on dirs, we could end up wasting an open call on what turns out
+	 * to be a dir. For file opens, we wait to call posix open till
+	 * cifs_open.  It could be added to atomic_open in the future but the
+	 * performance tradeoff of the extra network request when EISDIR or
+	 * EACCES is returned would have to be weighed against the 50% reduction
+	 * in network traffic in the other paths.
+	 */
+	if (!(oflags & O_CREAT)) {
+		struct dentry *res = cifs_lookup(inode, direntry, NULL);
+		if (IS_ERR(res))
+			return ERR_CAST(res);
+
+		finish_no_open(od, res);
+		return NULL;
+	}
+
+	rc = check_name(direntry);
+	if (rc)
+		return ERR_PTR(rc);
+
+	xid = GetXid();
+
+	cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
+	     inode, direntry->d_name.name, direntry);
+
+	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+	filp = ERR_CAST(tlink);
+	if (IS_ERR(tlink))
+		goto free_xid;
+
+	tcon = tlink_tcon(tlink);
+
+	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+			    &oplock, &fileHandle, created);
+
+	if (rc) {
+		filp = ERR_PTR(rc);
+		goto out;
+	}
+
+	filp = finish_open(od, direntry, generic_file_open);
+	if (IS_ERR(filp)) {
 		CIFSSMBClose(xid, tcon, fileHandle);
+		goto out;
 	}
 
-cifs_create_out:
-	kfree(buf);
-	kfree(full_path);
+	pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
+	if (pfile_info == NULL) {
+		CIFSSMBClose(xid, tcon, fileHandle);
+		fput(filp);
+		filp = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	cifs_put_tlink(tlink);
+free_xid:
+	FreeXid(xid);
+	return filp;
+}
+
+int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
+		struct nameidata *nd)
+{
+	int rc;
+	int xid = GetXid();
+	/*
+	 * BB below access is probably too much for mknod to request
+	 *    but we have to do query and setpathinfo so requesting
+	 *    less could fail (unless we want to request getatr and setatr
+	 *    permissions (only).  At least for POSIX we do not have to
+	 *    request so much.
+	 */
+	unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
+	struct tcon_link *tlink;
+	__u16 fileHandle;
+	__u32 oplock;
+	bool created = true;
+
+	cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
+	     inode, direntry->d_name.name, direntry);
+
+	tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+	rc = PTR_ERR(tlink);
+	if (IS_ERR(tlink))
+		goto free_xid;
+
+	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+			    &oplock, &fileHandle, &created);
+	if (!rc)
+		CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle);
+
 	cifs_put_tlink(tlink);
+free_xid:
 	FreeXid(xid);
+
 	return rc;
 }
 
@@ -492,16 +628,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 {
 	int xid;
 	int rc = 0; /* to get around spurious gcc warning, set to zero here */
-	__u32 oplock;
-	__u16 fileHandle = 0;
-	bool posix_open = false;
 	struct cifs_sb_info *cifs_sb;
 	struct tcon_link *tlink;
 	struct cifs_tcon *pTcon;
-	struct cifsFileInfo *cfile;
 	struct inode *newInode = NULL;
 	char *full_path = NULL;
-	struct file *filp;
 
 	xid = GetXid();
 
@@ -518,31 +649,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	}
 	pTcon = tlink_tcon(tlink);
 
-	oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
-
-	/*
-	 * Don't allow the separator character in a path component.
-	 * The VFS will not allow "/", but "\" is allowed by posix.
-	 */
-	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
-		int i;
-		for (i = 0; i < direntry->d_name.len; i++)
-			if (direntry->d_name.name[i] == '\\') {
-				cFYI(1, "Invalid file name");
-				rc = -EINVAL;
-				goto lookup_out;
-			}
-	}
-
-	/*
-	 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
-	 * the VFS handle the create.
-	 */
-	if (nd && (nd->flags & LOOKUP_EXCL)) {
-		d_instantiate(direntry, NULL);
-		rc = 0;
+	rc = check_name(direntry);
+	if (rc)
 		goto lookup_out;
-	}
 
 	/* can not grab the rename sem here since it would
 	deadlock in the cases (beginning of sys_rename itself)
@@ -560,80 +669,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	}
 	cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
 
-	/* Posix open is only called (at lookup time) for file create now.
-	 * For opens (rather than creates), because we do not know if it
-	 * is a file or directory yet, and current Samba no longer allows
-	 * us to do posix open on dirs, we could end up wasting an open call
-	 * on what turns out to be a dir. For file opens, we wait to call posix
-	 * open till cifs_open.  It could be added here (lookup) in the future
-	 * but the performance tradeoff of the extra network request when EISDIR
-	 * or EACCES is returned would have to be weighed against the 50%
-	 * reduction in network traffic in the other paths.
-	 */
 	if (pTcon->unix_ext) {
-		if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
-		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
-		     (nd->intent.open.file->f_flags & O_CREAT)) {
-			rc = cifs_posix_open(full_path, &newInode,
-					parent_dir_inode->i_sb,
-					nd->intent.open.create_mode,
-					nd->intent.open.file->f_flags, &oplock,
-					&fileHandle, xid);
-			/*
-			 * The check below works around a bug in POSIX
-			 * open in samba versions 3.3.1 and earlier where
-			 * open could incorrectly fail with invalid parameter.
-			 * If either that or op not supported returned, follow
-			 * the normal lookup.
-			 */
-			switch (rc) {
-			case 0:
-				/*
-				 * The server may allow us to open things like
-				 * FIFOs, but the client isn't set up to deal
-				 * with that. If it's not a regular file, just
-				 * close it and proceed as if it were a normal
-				 * lookup.
-				 */
-				if (newInode && !S_ISREG(newInode->i_mode)) {
-					CIFSSMBClose(xid, pTcon, fileHandle);
-					break;
-				}
-			case -ENOENT:
-				posix_open = true;
-			case -EOPNOTSUPP:
-				break;
-			default:
-				pTcon->broken_posix_open = true;
-			}
-		}
-		if (!posix_open)
-			rc = cifs_get_inode_info_unix(&newInode, full_path,
-						parent_dir_inode->i_sb, xid);
-	} else
+		rc = cifs_get_inode_info_unix(&newInode, full_path,
+					      parent_dir_inode->i_sb, xid);
+	} else {
 		rc = cifs_get_inode_info(&newInode, full_path, NULL,
 				parent_dir_inode->i_sb, xid, NULL);
+	}
 
 	if ((rc == 0) && (newInode != NULL)) {
 		d_add(direntry, newInode);
-		if (posix_open) {
-			filp = lookup_instantiate_filp(nd, direntry,
-						       generic_file_open);
-			if (IS_ERR(filp)) {
-				rc = PTR_ERR(filp);
-				CIFSSMBClose(xid, pTcon, fileHandle);
-				goto lookup_out;
-			}
-
-			cfile = cifs_new_fileinfo(fileHandle, filp, tlink,
-						  oplock);
-			if (cfile == NULL) {
-				fput(filp);
-				CIFSSMBClose(xid, pTcon, fileHandle);
-				rc = -ENOMEM;
-				goto lookup_out;
-			}
-		}
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
-- 
cgit v1.2.3-70-g09d2


From 3819219b592159725069eb16a7a46f58e4ecef32 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:24 +0200
Subject: ceph: remove unused arg from ceph_lookup_open()

What was the purpose of this?

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Sage Weil <sage@newdream.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/dir.c   | 4 ++--
 fs/ceph/file.c  | 3 +--
 fs/ceph/super.h | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e8094be460..c4b7832c38b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -599,7 +599,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	    (nd->flags & LOOKUP_OPEN) &&
 	    !(nd->intent.open.flags & O_CREAT)) {
 		int mode = nd->intent.open.create_mode & ~current->fs->umask;
-		return ceph_lookup_open(dir, dentry, nd, mode, 1);
+		return ceph_lookup_open(dir, dentry, nd, mode);
 	}
 
 	/* can we conclude ENOENT locally? */
@@ -710,7 +710,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	if (nd) {
 		BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
-		dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
+		dentry = ceph_lookup_open(dir, dentry, nd, mode);
 		/* hrm, what should i do here if we get aliased? */
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 988d4f302e4..4bf9773e6a3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -219,8 +219,7 @@ out:
  *  path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
  */
 struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				struct nameidata *nd, int mode,
-				int locked_dir)
+				struct nameidata *nd, int mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fc35036d258..8471db98b62 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -807,8 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages,
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				       struct nameidata *nd, int mode,
-				       int locked_dir);
+				       struct nameidata *nd, int mode);
 extern int ceph_release(struct inode *inode, struct file *filp);
 
 /* dir.c */
-- 
cgit v1.2.3-70-g09d2


From 2d83bde9a16e18eafdc73a3a1f4a8eb110e49672 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:25 +0200
Subject: ceph: implement i_op->atomic_open()

Add an ->atomic_open implementation which replaces the atomic lookup+open+create
operation implemented via ->lookup and ->create operations.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Sage Weil <sage@newdream.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/dir.c   | 68 ++++++++++++++++++++++++++++++++++++---------------------
 fs/ceph/file.c  | 21 +++++++++---------
 fs/ceph/super.h |  5 +++--
 3 files changed, 56 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c4b7832c38b..75df600ec9b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	if (err < 0)
 		return ERR_PTR(err);
 
-	/* open (but not create!) intent? */
-	if (nd &&
-	    (nd->flags & LOOKUP_OPEN) &&
-	    !(nd->intent.open.flags & O_CREAT)) {
-		int mode = nd->intent.open.create_mode & ~current->fs->umask;
-		return ceph_lookup_open(dir, dentry, nd, mode);
-	}
-
 	/* can we conclude ENOENT locally? */
 	if (dentry->d_inode == NULL) {
 		struct ceph_inode_info *ci = ceph_inode(dir);
@@ -642,6 +634,47 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	return dentry;
 }
 
+struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+			      struct opendata *od, unsigned flags, umode_t mode,
+			      bool *created)
+{
+	int err;
+	struct dentry *res = NULL;
+	struct file *filp;
+
+	if (!(flags & O_CREAT)) {
+		if (dentry->d_name.len > NAME_MAX)
+			return ERR_PTR(-ENAMETOOLONG);
+
+		err = ceph_init_dentry(dentry);
+		if (err < 0)
+			return ERR_PTR(err);
+
+		return ceph_lookup_open(dir, dentry, od, flags, mode);
+	}
+
+	if (d_unhashed(dentry)) {
+		res = ceph_lookup(dir, dentry, NULL);
+		if (IS_ERR(res))
+			return ERR_CAST(res);
+
+		if (res)
+			dentry = res;
+	}
+
+	/* We don't deal with positive dentries here */
+	if (dentry->d_inode) {
+		finish_no_open(od, res);
+		return NULL;
+	}
+
+	*created = true;
+	filp = ceph_lookup_open(dir, dentry, od, flags, mode);
+	dput(res);
+
+	return filp;
+}
+
 /*
  * If we do a create but get no trace back from the MDS, follow up with
  * a lookup (the VFS expects us to link up the provided dentry).
@@ -702,23 +735,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		       struct nameidata *nd)
 {
-	dout("create in dir %p dentry %p name '%.*s'\n",
-	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
-
-	if (ceph_snap(dir) != CEPH_NOSNAP)
-		return -EROFS;
-
-	if (nd) {
-		BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
-		dentry = ceph_lookup_open(dir, dentry, nd, mode);
-		/* hrm, what should i do here if we get aliased? */
-		if (IS_ERR(dentry))
-			return PTR_ERR(dentry);
-		return 0;
-	}
-
-	/* fall back to mknod */
-	return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
+	return ceph_mknod(dir, dentry, mode, 0);
 }
 
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
@@ -1357,6 +1374,7 @@ const struct inode_operations ceph_dir_iops = {
 	.rmdir = ceph_unlink,
 	.rename = ceph_rename,
 	.create = ceph_create,
+	.atomic_open = ceph_atomic_open,
 };
 
 const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4bf9773e6a3..e34dc22e75a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,21 +213,15 @@ out:
  * may_open() fails, the struct *file gets cleaned up (i.e.
  * ceph_release gets called).  So fear not!
  */
-/*
- * flags
- *  path_lookup_open   -> LOOKUP_OPEN
- *  path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
- */
-struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				struct nameidata *nd, int mode)
+struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+			      struct opendata *od, unsigned flags, umode_t mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct file *file;
+	struct file *file = NULL;
 	struct ceph_mds_request *req;
 	struct dentry *ret;
 	int err;
-	int flags = nd->intent.open.flags;
 
 	dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
 	     dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -253,14 +247,19 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 		err = ceph_handle_notrace_create(dir, dentry);
 	if (err)
 		goto out;
-	file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
+	file = finish_open(od, req->r_dentry, ceph_open);
 	if (IS_ERR(file))
 		err = PTR_ERR(file);
 out:
 	ret = ceph_finish_lookup(req, dentry, err);
 	ceph_mdsc_put_request(req);
 	dout("ceph_lookup_open result=%p\n", ret);
-	return ret;
+
+	if (IS_ERR(ret))
+		return ERR_CAST(ret);
+
+	dput(ret);
+	return err ? ERR_PTR(err) : file;
 }
 
 int ceph_release(struct inode *inode, struct file *file)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8471db98b62..e61e54673e5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -806,8 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
 				    loff_t off, size_t len);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
-extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				       struct nameidata *nd, int mode);
+extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+				     struct opendata *od, unsigned flags,
+				     umode_t mode);
 extern int ceph_release(struct inode *inode, struct file *filp);
 
 /* dir.c */
-- 
cgit v1.2.3-70-g09d2


From e43ae79c540270865918dab5ac914c74f43101e2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:26 +0200
Subject: 9p: implement i_op->atomic_open()

Add an ->atomic_open implementation which replaces the atomic open+create
operation implemented via ->create.  No functionality is changed.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 169 +++++++++++++++++++++++++++++--------------------
 fs/9p/vfs_inode_dotl.c |  52 ++++++++++-----
 2 files changed, 137 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 57ccb7537da..e8c42ceb89b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -712,11 +712,14 @@ error:
 }
 
 /**
- * v9fs_vfs_create - VFS hook to create files
+ * v9fs_vfs_create - VFS hook to create a regular file
+ *
+ * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open().  This is only called
+ * for mknod(2).
+ *
  * @dir: directory inode that is being created
  * @dentry:  dentry that is being deleted
  * @mode: create permissions
- * @nd: path information
  *
  */
 
@@ -724,76 +727,19 @@ static int
 v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
-	int err;
-	u32 perm;
-	int flags;
-	struct file *filp;
-	struct v9fs_inode *v9inode;
-	struct v9fs_session_info *v9ses;
-	struct p9_fid *fid, *inode_fid;
-
-	err = 0;
-	fid = NULL;
-	v9ses = v9fs_inode2v9ses(dir);
-	perm = unixmode2p9mode(v9ses, mode);
-	if (nd)
-		flags = nd->intent.open.flags;
-	else
-		flags = O_RDWR;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	u32 perm = unixmode2p9mode(v9ses, mode);
+	struct p9_fid *fid;
 
-	fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
-				v9fs_uflags2omode(flags,
-						v9fs_proto_dotu(v9ses)));
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		fid = NULL;
-		goto error;
-	}
+	/* P9_OEXCL? */
+	fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
 
 	v9fs_invalidate_inode_attr(dir);
-	/* if we are opening a file, assign the open fid to the file */
-	if (nd) {
-		v9inode = V9FS_I(dentry->d_inode);
-		mutex_lock(&v9inode->v_mutex);
-		if (v9ses->cache && !v9inode->writeback_fid &&
-		    ((flags & O_ACCMODE) != O_RDONLY)) {
-			/*
-			 * clone a fid and add it to writeback_fid
-			 * we do it during open time instead of
-			 * page dirty time via write_begin/page_mkwrite
-			 * because we want write after unlink usecase
-			 * to work.
-			 */
-			inode_fid = v9fs_writeback_fid(dentry);
-			if (IS_ERR(inode_fid)) {
-				err = PTR_ERR(inode_fid);
-				mutex_unlock(&v9inode->v_mutex);
-				goto error;
-			}
-			v9inode->writeback_fid = (void *) inode_fid;
-		}
-		mutex_unlock(&v9inode->v_mutex);
-		filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
-		if (IS_ERR(filp)) {
-			err = PTR_ERR(filp);
-			goto error;
-		}
-
-		filp->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
-		if (v9ses->cache)
-			v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
-#endif
-	} else
-		p9_client_clunk(fid);
+	p9_client_clunk(fid);
 
 	return 0;
-
-error:
-	if (fid)
-		p9_client_clunk(fid);
-
-	return err;
 }
 
 /**
@@ -910,6 +856,93 @@ error:
 	return ERR_PTR(result);
 }
 
+static struct file *
+v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
+		     struct opendata *od, unsigned flags, umode_t mode,
+		     bool *created)
+{
+	int err;
+	u32 perm;
+	struct file *filp;
+	struct v9fs_inode *v9inode;
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid, *inode_fid;
+	struct dentry *res = NULL;
+
+	if (d_unhashed(dentry)) {
+		res = v9fs_vfs_lookup(dir, dentry, NULL);
+		if (IS_ERR(res))
+			return ERR_CAST(res);
+
+		if (res)
+			dentry = res;
+	}
+
+	/* Only creates */
+	if (!(flags & O_CREAT) || dentry->d_inode) {
+		finish_no_open(od, res);
+		return NULL;
+	}
+
+	err = 0;
+	fid = NULL;
+	v9ses = v9fs_inode2v9ses(dir);
+	perm = unixmode2p9mode(v9ses, mode);
+	fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+				v9fs_uflags2omode(flags,
+						v9fs_proto_dotu(v9ses)));
+	if (IS_ERR(fid)) {
+		err = PTR_ERR(fid);
+		fid = NULL;
+		goto error;
+	}
+
+	v9fs_invalidate_inode_attr(dir);
+	v9inode = V9FS_I(dentry->d_inode);
+	mutex_lock(&v9inode->v_mutex);
+	if (v9ses->cache && !v9inode->writeback_fid &&
+	    ((flags & O_ACCMODE) != O_RDONLY)) {
+		/*
+		 * clone a fid and add it to writeback_fid
+		 * we do it during open time instead of
+		 * page dirty time via write_begin/page_mkwrite
+		 * because we want write after unlink usecase
+		 * to work.
+		 */
+		inode_fid = v9fs_writeback_fid(dentry);
+		if (IS_ERR(inode_fid)) {
+			err = PTR_ERR(inode_fid);
+			mutex_unlock(&v9inode->v_mutex);
+			goto error;
+		}
+		v9inode->writeback_fid = (void *) inode_fid;
+	}
+	mutex_unlock(&v9inode->v_mutex);
+	filp = finish_open(od, dentry, generic_file_open);
+	if (IS_ERR(filp)) {
+		err = PTR_ERR(filp);
+		goto error;
+	}
+
+	filp->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+	if (v9ses->cache)
+		v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+#endif
+
+	*created = true;
+out:
+	dput(res);
+	return filp;
+
+error:
+	if (fid)
+		p9_client_clunk(fid);
+
+	filp = ERR_PTR(err);
+	goto out;
+}
+
 /**
  * v9fs_vfs_unlink - VFS unlink hook to delete an inode
  * @i:  inode that is being unlinked
@@ -1488,6 +1521,7 @@ out:
 static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
+	.atomic_open = v9fs_vfs_atomic_open,
 	.symlink = v9fs_vfs_symlink,
 	.link = v9fs_vfs_link,
 	.unlink = v9fs_vfs_unlink,
@@ -1502,6 +1536,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 static const struct inode_operations v9fs_dir_inode_operations = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
+	.atomic_open = v9fs_vfs_atomic_open,
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index e3dd2a1e2bf..a354fe2cb23 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -230,17 +230,23 @@ int v9fs_open_to_dotl_flags(int flags)
  * @dir: directory inode that is being created
  * @dentry:  dentry that is being deleted
  * @mode: create permissions
- * @nd: path information
  *
  */
 
 static int
 v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 		struct nameidata *nd)
+{
+	return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+}
+
+static struct file *
+v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+			  struct opendata *od, unsigned flags, umode_t omode,
+			  bool *created)
 {
 	int err = 0;
 	gid_t gid;
-	int flags;
 	umode_t mode;
 	char *name = NULL;
 	struct file *filp;
@@ -251,19 +257,25 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	struct p9_fid *dfid, *ofid, *inode_fid;
 	struct v9fs_session_info *v9ses;
 	struct posix_acl *pacl = NULL, *dacl = NULL;
+	struct dentry *res = NULL;
 
-	v9ses = v9fs_inode2v9ses(dir);
-	if (nd)
-		flags = nd->intent.open.flags;
-	else {
-		/*
-		 * create call without LOOKUP_OPEN is due
-		 * to mknod of regular files. So use mknod
-		 * operation.
-		 */
-		return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+	if (d_unhashed(dentry)) {
+		res = v9fs_vfs_lookup(dir, dentry, NULL);
+		if (IS_ERR(res))
+			return ERR_CAST(res);
+
+		if (res)
+			dentry = res;
+	}
+
+	/* Only creates */
+	if (!(flags & O_CREAT) || dentry->d_inode) {
+		finish_no_open(od, res);
+		return NULL;
 	}
 
+	v9ses = v9fs_inode2v9ses(dir);
+
 	name = (char *) dentry->d_name.name;
 	p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
 		 name, flags, omode);
@@ -272,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
 		p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
-		return err;
+		goto err_return;
 	}
 
 	/* clone a fid to use for creation */
@@ -280,7 +292,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	if (IS_ERR(ofid)) {
 		err = PTR_ERR(ofid);
 		p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
-		return err;
+		goto err_return;
 	}
 
 	gid = v9fs_get_fsgid_for_create(dir);
@@ -345,7 +357,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
+	filp = finish_open(od, dentry, generic_file_open);
 	if (IS_ERR(filp)) {
 		err = PTR_ERR(filp);
 		goto err_clunk_old_fid;
@@ -355,7 +367,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(inode, filp);
 #endif
-	return 0;
+	*created = true;
+out:
+	dput(res);
+	return filp;
 
 error:
 	if (fid)
@@ -364,7 +379,9 @@ err_clunk_old_fid:
 	if (ofid)
 		p9_client_clunk(ofid);
 	v9fs_set_create_acl(NULL, &dacl, &pacl);
-	return err;
+err_return:
+	filp = ERR_PTR(err);
+	goto out;
 }
 
 /**
@@ -982,6 +999,7 @@ out:
 
 const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.create = v9fs_vfs_create_dotl,
+	.atomic_open = v9fs_vfs_atomic_open_dotl,
 	.lookup = v9fs_vfs_lookup,
 	.link = v9fs_vfs_link_dotl,
 	.symlink = v9fs_vfs_symlink_dotl,
-- 
cgit v1.2.3-70-g09d2


From 015c3bbcd88df2305aae5b017a9c91c08b380aa1 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:27 +0200
Subject: vfs: remove open intents from nameidata

All users of open intents have been converted to use ->atomic_{open,create}.

This patch gets rid of nd->intent.open and related infrastructure.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         |  5 +--
 fs/namei.c            | 95 ++++++++++++++++++++++++---------------------------
 fs/open.c             | 87 ++--------------------------------------------
 include/linux/namei.h | 14 --------
 4 files changed, 48 insertions(+), 153 deletions(-)

(limited to 'fs')

diff --git a/fs/internal.h b/fs/internal.h
index 70067775df2..ae69a3b150d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,13 +82,10 @@ extern struct super_block *user_get_super(dev_t);
 /*
  * open.c
  */
-struct nameidata;
-extern struct file *nameidata_to_filp(struct nameidata *);
-extern void release_open_intent(struct nameidata *);
 struct opendata {
 	struct dentry *dentry;
 	struct vfsmount *mnt;
-	struct file **filp;
+	struct file *filp;
 };
 struct open_flags {
 	int open_flag;
diff --git a/fs/namei.c b/fs/namei.c
index 9e11ae83bff..0ed876259f8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -463,22 +463,6 @@ err_root:
 	return -ECHILD;
 }
 
-/**
- * release_open_intent - free up open intent resources
- * @nd: pointer to nameidata
- */
-void release_open_intent(struct nameidata *nd)
-{
-	struct file *file = nd->intent.open.file;
-
-	if (file && !IS_ERR(file)) {
-		if (file->f_path.dentry == NULL)
-			put_filp(file);
-		else
-			fput(file);
-	}
-}
-
 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	return dentry->d_op->d_revalidate(dentry, nd);
@@ -2210,7 +2194,8 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 }
 
 static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
-				struct path *path, const struct open_flags *op,
+				struct path *path, struct opendata *od,
+				const struct open_flags *op,
 				int *want_write, bool need_lookup,
 				bool *created)
 {
@@ -2219,7 +2204,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	umode_t mode;
 	int error;
 	int acc_mode;
-	struct opendata od;
 	struct file *filp;
 	int create_error = 0;
 	struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
@@ -2285,14 +2269,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	if (nd->flags & LOOKUP_DIRECTORY)
 		open_flag |= O_DIRECTORY;
 
-	od.dentry = DENTRY_NOT_SET;
-	od.mnt = nd->path.mnt;
-	od.filp = &nd->intent.open.file;
-	filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+	od->dentry = DENTRY_NOT_SET;
+	od->mnt = nd->path.mnt;
+	filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
 				      created);
 	if (IS_ERR(filp)) {
-		if (WARN_ON(od.dentry != DENTRY_NOT_SET))
-			dput(od.dentry);
+		if (WARN_ON(od->dentry != DENTRY_NOT_SET))
+			dput(od->dentry);
 
 		if (create_error && PTR_ERR(filp) == -ENOENT)
 			filp = ERR_PTR(create_error);
@@ -2306,13 +2289,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	}
 
 	if (!filp) {
-		if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+		if (WARN_ON(od->dentry == DENTRY_NOT_SET)) {
 			filp = ERR_PTR(-EIO);
 			goto out;
 		}
-		if (od.dentry) {
+		if (od->dentry) {
 			dput(dentry);
-			dentry = od.dentry;
+			dentry = od->dentry;
 		}
 		goto looked_up;
 	}
@@ -2375,6 +2358,7 @@ looked_up:
  * was performed, only lookup.
  */
 static struct file *lookup_open(struct nameidata *nd, struct path *path,
+				struct opendata *od,
 				const struct open_flags *op,
 				int *want_write, bool *created)
 {
@@ -2394,7 +2378,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		goto out_no_open;
 
 	if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
-		return atomic_open(nd, dentry, path, op, want_write,
+		return atomic_open(nd, dentry, path, od, op, want_write,
 				   need_lookup, created);
 	}
 
@@ -2416,7 +2400,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		 * rw->ro transition does not occur between
 		 * the time when the file is created and when
 		 * a permanent write count is taken through
-		 * the 'struct file' in nameidata_to_filp().
+		 * the 'struct file' in finish_open().
 		 */
 		error = mnt_want_write(nd->path.mnt);
 		if (error)
@@ -2444,7 +2428,8 @@ out_dput:
  * Handle the last step of open()
  */
 static struct file *do_last(struct nameidata *nd, struct path *path,
-			    const struct open_flags *op, const char *pathname)
+			    struct opendata *od, const struct open_flags *op,
+			    const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
 	int open_flag = op->open_flag;
@@ -2521,7 +2506,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
-	filp = lookup_open(nd, path, op, &want_write, &created);
+	filp = lookup_open(nd, path, od, op, &want_write, &created);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 	if (filp) {
@@ -2627,7 +2612,8 @@ common:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto exit;
-	filp = nameidata_to_filp(nd);
+	od->mnt = nd->path.mnt;
+	filp = finish_open(od, nd->path.dentry, NULL);
 	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
 		BUG_ON(save_parent.dentry != dir);
 		path_put(&nd->path);
@@ -2642,6 +2628,11 @@ common:
 		retried = true;
 		goto retry_lookup;
 	}
+	if (IS_ERR(filp))
+		goto out;
+	error = open_check_o_direct(filp);
+	if (error)
+		goto exit_fput;
 opened:
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, op->acc_mode);
@@ -2671,24 +2662,26 @@ exit_dput:
 exit:
 	filp = ERR_PTR(error);
 	goto out;
+exit_fput:
+	fput(filp);
+	goto exit;
+
 }
 
 static struct file *path_openat(int dfd, const char *pathname,
 		struct nameidata *nd, const struct open_flags *op, int flags)
 {
 	struct file *base = NULL;
-	struct file *filp;
+	struct opendata od;
+	struct file *res;
 	struct path path;
 	int error;
 
-	filp = get_empty_filp();
-	if (!filp)
+	od.filp = get_empty_filp();
+	if (!od.filp)
 		return ERR_PTR(-ENFILE);
 
-	filp->f_flags = op->open_flag;
-	nd->intent.open.file = filp;
-	nd->intent.open.flags = open_to_namei_flags(op->open_flag);
-	nd->intent.open.create_mode = op->mode;
+	od.filp->f_flags = op->open_flag;
 
 	error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
 	if (unlikely(error))
@@ -2699,14 +2692,14 @@ static struct file *path_openat(int dfd, const char *pathname,
 	if (unlikely(error))
 		goto out_filp;
 
-	filp = do_last(nd, &path, op, pathname);
-	while (unlikely(!filp)) { /* trailing symlink */
+	res = do_last(nd, &path, &od, op, pathname);
+	while (unlikely(!res)) { /* trailing symlink */
 		struct path link = path;
 		void *cookie;
 		if (!(nd->flags & LOOKUP_FOLLOW)) {
 			path_put_conditional(&path, nd);
 			path_put(&nd->path);
-			filp = ERR_PTR(-ELOOP);
+			res = ERR_PTR(-ELOOP);
 			break;
 		}
 		nd->flags |= LOOKUP_PARENT;
@@ -2714,7 +2707,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 		error = follow_link(&link, nd, &cookie);
 		if (unlikely(error))
 			goto out_filp;
-		filp = do_last(nd, &path, op, pathname);
+		res = do_last(nd, &path, &od, op, pathname);
 		put_link(nd, &link, cookie);
 	}
 out:
@@ -2722,17 +2715,20 @@ out:
 		path_put(&nd->root);
 	if (base)
 		fput(base);
-	release_open_intent(nd);
-	if (filp == ERR_PTR(-EOPENSTALE)) {
+	if (od.filp) {
+		BUG_ON(od.filp->f_path.dentry);
+		put_filp(od.filp);
+	}
+	if (res == ERR_PTR(-EOPENSTALE)) {
 		if (flags & LOOKUP_RCU)
-			filp = ERR_PTR(-ECHILD);
+			res = ERR_PTR(-ECHILD);
 		else
-			filp = ERR_PTR(-ESTALE);
+			res = ERR_PTR(-ESTALE);
 	}
-	return filp;
+	return res;
 
 out_filp:
-	filp = ERR_PTR(error);
+	res = ERR_PTR(error);
 	goto out;
 }
 
@@ -2788,7 +2784,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
 		goto out;
 	nd.flags &= ~LOOKUP_PARENT;
 	nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
-	nd.intent.open.flags = O_EXCL;
 
 	/*
 	 * Do the final lookup.
diff --git a/fs/open.c b/fs/open.c
index 13bece4f36a..937f4ec2018 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -770,46 +770,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	return res;
 }
 
-/**
- * lookup_instantiate_filp - instantiates the open intent filp
- * @nd: pointer to nameidata
- * @dentry: pointer to dentry
- * @open: open callback
- *
- * Helper for filesystems that want to use lookup open intents and pass back
- * a fully instantiated struct file to the caller.
- * This function is meant to be called from within a filesystem's
- * lookup method.
- * Beware of calling it for non-regular files! Those ->open methods might block
- * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
- * leading to a deadlock, as nobody can open that fifo anymore, because
- * another process to open fifo will block on locked parent when doing lookup).
- * Note that in case of error, nd->intent.open.file is destroyed, but the
- * path information remains valid.
- * If the open callback is set to NULL, then the standard f_op->open()
- * filesystem callback is substituted.
- */
-struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
-		int (*open)(struct inode *, struct file *))
-{
-	const struct cred *cred = current_cred();
-
-	if (IS_ERR(nd->intent.open.file))
-		goto out;
-	if (IS_ERR(dentry))
-		goto out_err;
-	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
-					     nd->intent.open.file,
-					     open, cred);
-out:
-	return nd->intent.open.file;
-out_err:
-	release_open_intent(nd);
-	nd->intent.open.file = ERR_CAST(dentry);
-	goto out;
-}
-EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
-
 /**
  * finish_open - finish opening a file
  * @od: opaque open data
@@ -829,9 +789,9 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry,
 	mntget(od->mnt);
 	dget(dentry);
 
-	res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
+	res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
 	if (!IS_ERR(res))
-		*od->filp = NULL;
+		od->filp = NULL;
 
 	return res;
 }
@@ -852,49 +812,6 @@ void finish_no_open(struct opendata *od, struct dentry *dentry)
 }
 EXPORT_SYMBOL(finish_no_open);
 
-/**
- * nameidata_to_filp - convert a nameidata to an open filp.
- * @nd: pointer to nameidata
- * @flags: open flags
- *
- * Note that this function destroys the original nameidata
- */
-struct file *nameidata_to_filp(struct nameidata *nd)
-{
-	const struct cred *cred = current_cred();
-	struct file *filp;
-
-	/* Pick up the filp from the open intent */
-	filp = nd->intent.open.file;
-
-	/* Has the filesystem initialised the file for us? */
-	if (filp->f_path.dentry != NULL) {
-		nd->intent.open.file = NULL;
-	} else {
-		struct file *res;
-
-		path_get(&nd->path);
-		res = do_dentry_open(nd->path.dentry, nd->path.mnt,
-				     filp, NULL, cred);
-		if (!IS_ERR(res)) {
-			int error;
-
-			nd->intent.open.file = NULL;
-			BUG_ON(res != filp);
-
-			error = open_check_o_direct(filp);
-			if (error) {
-				fput(filp);
-				filp = ERR_PTR(error);
-			}
-		} else {
-			/* Allow nd->intent.open.file to be recycled */
-			filp = res;
-		}
-	}
-	return filp;
-}
-
 /*
  * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
  * error.
diff --git a/include/linux/namei.h b/include/linux/namei.h
index ffc02135c48..23d85987921 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -7,12 +7,6 @@
 
 struct vfsmount;
 
-struct open_intent {
-	int	flags;
-	int	create_mode;
-	struct file *file;
-};
-
 enum { MAX_NESTED_LINKS = 8 };
 
 struct nameidata {
@@ -25,11 +19,6 @@ struct nameidata {
 	int		last_type;
 	unsigned	depth;
 	char *saved_names[MAX_NESTED_LINKS + 1];
-
-	/* Intent data */
-	union {
-		struct open_intent open;
-	} intent;
 };
 
 /*
@@ -82,9 +71,6 @@ extern int kern_path_parent(const char *, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
 
-extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
-		int (*open)(struct inode *, struct file *));
-
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
 extern int follow_down_one(struct path *);
-- 
cgit v1.2.3-70-g09d2


From aa4caadb70b782999ce5d150ac2f4b1d18e2fc75 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:28 +0200
Subject: vfs: do_last(): clean up error handling

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 0ed876259f8..044215a7bb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2634,21 +2634,14 @@ common:
 	if (error)
 		goto exit_fput;
 opened:
-	if (!IS_ERR(filp)) {
-		error = ima_file_check(filp, op->acc_mode);
-		if (error) {
-			fput(filp);
-			filp = ERR_PTR(error);
-		}
-	}
-	if (!IS_ERR(filp)) {
-		if (will_truncate) {
-			error = handle_truncate(filp);
-			if (error) {
-				fput(filp);
-				filp = ERR_PTR(error);
-			}
-		}
+	error = ima_file_check(filp, op->acc_mode);
+	if (error)
+		goto exit_fput;
+
+	if (will_truncate) {
+		error = handle_truncate(filp);
+		if (error)
+			goto exit_fput;
 	}
 out:
 	if (want_write)
-- 
cgit v1.2.3-70-g09d2


From e83db167229702da0f48957641e0dbf36b2644aa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:29 +0200
Subject: vfs: do_last(): clean up labels

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 044215a7bb0..ea24376cfa9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2463,13 +2463,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			error = -EISDIR;
 			goto exit;
 		}
-		goto ok;
+		goto finish_open;
 	case LAST_BIND:
 		error = complete_walk(nd);
 		if (error)
 			return ERR_PTR(error);
 		audit_inode(pathname, dir);
-		goto ok;
+		goto finish_open;
 	}
 
 	if (!(open_flag & O_CREAT)) {
@@ -2526,7 +2526,7 @@ retry_lookup:
 		will_truncate = 0;
 		acc_mode = MAY_OPEN;
 		path_to_nameidata(path, nd);
-		goto common;
+		goto finish_open_created;
 	}
 
 	/*
@@ -2598,7 +2598,7 @@ finish_lookup:
 	if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
 		goto exit;
 	audit_inode(pathname, nd->path.dentry);
-ok:
+finish_open:
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = 0;
 
@@ -2608,7 +2608,7 @@ ok:
 			goto exit;
 		want_write = 1;
 	}
-common:
+finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto exit;
-- 
cgit v1.2.3-70-g09d2


From 77d660a8a83036432dc33f092a367d06563d233e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:30 +0200
Subject: vfs: do_last(): clean up bool

Consistently use bool for boolean values in do_last().

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index ea24376cfa9..6bdb8d73253 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,7 +2196,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 				struct path *path, struct opendata *od,
 				const struct open_flags *op,
-				int *want_write, bool need_lookup,
+				bool *want_write, bool need_lookup,
 				bool *created)
 {
 	struct inode *dir =  nd->path.dentry->d_inode;
@@ -2238,7 +2238,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	    (open_flag & O_ACCMODE) != O_RDONLY) {
 		error = mnt_want_write(nd->path.mnt);
 		if (!error) {
-			*want_write = 1;
+			*want_write = true;
 		} else if (!(open_flag & O_CREAT)) {
 			/*
 			 * No O_CREATE -> atomicity not a requirement -> fall
@@ -2360,7 +2360,7 @@ looked_up:
 static struct file *lookup_open(struct nameidata *nd, struct path *path,
 				struct opendata *od,
 				const struct open_flags *op,
-				int *want_write, bool *created)
+				bool *want_write, bool *created)
 {
 	struct dentry *dir = nd->path.dentry;
 	struct inode *dir_inode = dir->d_inode;
@@ -2405,7 +2405,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		error = mnt_want_write(nd->path.mnt);
 		if (error)
 			goto out_dput;
-		*want_write = 1;
+		*want_write = true;
 		*created = true;
 		error = security_path_mknod(&nd->path, dentry, mode, 0);
 		if (error)
@@ -2433,13 +2433,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 {
 	struct dentry *dir = nd->path.dentry;
 	int open_flag = op->open_flag;
-	int will_truncate = open_flag & O_TRUNC;
-	int want_write = 0;
+	bool will_truncate = (open_flag & O_TRUNC) != 0;
+	bool want_write = false;
 	int acc_mode = op->acc_mode;
 	struct file *filp;
 	struct inode *inode;
 	bool created;
-	int symlink_ok = 0;
+	bool symlink_ok = false;
 	struct path save_parent = { .dentry = NULL, .mnt = NULL };
 	bool retried = false;
 	int error;
@@ -2476,7 +2476,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		if (nd->last.name[nd->last.len])
 			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 		if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
-			symlink_ok = 1;
+			symlink_ok = true;
 		/* we _can_ be in RCU mode here */
 		error = lookup_fast(nd, &nd->last, path, &inode);
 		if (likely(!error))
@@ -2514,7 +2514,7 @@ retry_lookup:
 			goto out;
 
 		if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
-			will_truncate = 0;
+			will_truncate = false;
 
 		audit_inode(pathname, filp->f_path.dentry);
 		goto opened;
@@ -2523,7 +2523,7 @@ retry_lookup:
 	if (created) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
-		will_truncate = 0;
+		will_truncate = false;
 		acc_mode = MAY_OPEN;
 		path_to_nameidata(path, nd);
 		goto finish_open_created;
@@ -2541,7 +2541,7 @@ retry_lookup:
 	 */
 	if (want_write) {
 		mnt_drop_write(nd->path.mnt);
-		want_write = 0;
+		want_write = false;
 	}
 
 	error = -EEXIST;
@@ -2600,13 +2600,13 @@ finish_lookup:
 	audit_inode(pathname, nd->path.dentry);
 finish_open:
 	if (!S_ISREG(nd->inode->i_mode))
-		will_truncate = 0;
+		will_truncate = false;
 
 	if (will_truncate) {
 		error = mnt_want_write(nd->path.mnt);
 		if (error)
 			goto exit;
-		want_write = 1;
+		want_write = true;
 	}
 finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
@@ -2623,7 +2623,7 @@ finish_open_created:
 		save_parent.dentry = NULL;
 		if (want_write) {
 			mnt_drop_write(nd->path.mnt);
-			want_write = 0;
+			want_write = false;
 		}
 		retried = true;
 		goto retry_lookup;
-- 
cgit v1.2.3-70-g09d2


From f60dc3db6e24b7c36445cf1feb56b34c799074b3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:31 +0200
Subject: vfs: do_last(): clean up retry

Move the lookup retry logic to the bottom of the function to make the normal
case simpler to read.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6bdb8d73253..183a769537f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2614,22 +2614,11 @@ finish_open_created:
 		goto exit;
 	od->mnt = nd->path.mnt;
 	filp = finish_open(od, nd->path.dentry, NULL);
-	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
-		BUG_ON(save_parent.dentry != dir);
-		path_put(&nd->path);
-		nd->path = save_parent;
-		nd->inode = dir->d_inode;
-		save_parent.mnt = NULL;
-		save_parent.dentry = NULL;
-		if (want_write) {
-			mnt_drop_write(nd->path.mnt);
-			want_write = false;
-		}
-		retried = true;
-		goto retry_lookup;
-	}
-	if (IS_ERR(filp))
+	if (IS_ERR(filp)) {
+		if (filp == ERR_PTR(-EOPENSTALE))
+			goto stale_open;
 		goto out;
+	}
 	error = open_check_o_direct(filp);
 	if (error)
 		goto exit_fput;
@@ -2659,6 +2648,23 @@ exit_fput:
 	fput(filp);
 	goto exit;
 
+stale_open:
+	/* If no saved parent or already retried then can't retry */
+	if (!save_parent.dentry || retried)
+		goto out;
+
+	BUG_ON(save_parent.dentry != dir);
+	path_put(&nd->path);
+	nd->path = save_parent;
+	nd->inode = dir->d_inode;
+	save_parent.mnt = NULL;
+	save_parent.dentry = NULL;
+	if (want_write) {
+		mnt_drop_write(nd->path.mnt);
+		want_write = false;
+	}
+	retried = true;
+	goto retry_lookup;
 }
 
 static struct file *path_openat(int dfd, const char *pathname,
-- 
cgit v1.2.3-70-g09d2


From a8277b9baa6268de386529a33061775bc716198b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2012 15:10:32 +0200
Subject: vfs: move O_DIRECT check to common code

Perform open_check_o_direct() in a common place in do_last after opening the
file.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 183a769537f..4bc4bc6a693 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2305,22 +2305,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * here.
 	 */
 	error = may_open(&filp->f_path, acc_mode, open_flag);
-	if (error)
-		goto out_fput;
-
-	error = open_check_o_direct(filp);
-	if (error)
-		goto out_fput;
+	if (error) {
+		fput(filp);
+		filp = ERR_PTR(error);
+	}
 
 out:
 	dput(dentry);
 	return filp;
 
-out_fput:
-	fput(filp);
-	filp = ERR_PTR(error);
-	goto out;
-
 no_open:
 	if (need_lookup) {
 		dentry = lookup_real(dir, dentry, nd);
@@ -2619,10 +2612,10 @@ finish_open_created:
 			goto stale_open;
 		goto out;
 	}
+opened:
 	error = open_check_o_direct(filp);
 	if (error)
 		goto exit_fput;
-opened:
 	error = ima_file_check(filp, op->acc_mode);
 	if (error)
 		goto exit_fput;
-- 
cgit v1.2.3-70-g09d2


From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 05:01:45 -0400
Subject: ->atomic_open() prototype change - pass int * instead of bool *

... and let finish_open() report having opened the file via that sucker.
Next step: don't modify od->filp at all.

[AV: FILE_CREATE was already used by cifs; Miklos' fix folded]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/9p/vfs_inode.c                 |  6 +++---
 fs/9p/vfs_inode_dotl.c            |  6 +++---
 fs/ceph/dir.c                     |  8 ++++----
 fs/ceph/file.c                    |  5 +++--
 fs/ceph/super.h                   |  2 +-
 fs/cifs/cifsfs.h                  |  2 +-
 fs/cifs/dir.c                     | 12 ++++++------
 fs/fuse/dir.c                     | 10 +++++-----
 fs/namei.c                        | 33 +++++++++++++++++----------------
 fs/nfs/dir.c                      | 11 ++++++-----
 fs/open.c                         |  7 +++++--
 include/linux/fs.h                |  9 +++++++--
 14 files changed, 63 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8157488c346..af4e45bd6cf 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -64,7 +64,7 @@ ata *);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	struct file * (*atomic_open)(struct inode *, struct dentry *,
 				struct opendata *, unsigned open_flag,
-				umode_t create_mode, bool *created);
+				umode_t create_mode, int *opened);
 
 locking rules:
 	all may block
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index beb6e691f70..d7121051afc 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -366,7 +366,7 @@ struct inode_operations {
 	void (*update_time)(struct inode *, struct timespec *, int);
 	struct file * (*atomic_open)(struct inode *, struct dentry *,
 				struct opendata *, unsigned open_flag,
-				umode_t create_mode, bool *created);
+				umode_t create_mode, int *opened);
 };
 
 Again, all methods are called without any locks being held, unless
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e8c42ceb89b..de626b3b342 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,7 +859,7 @@ error:
 static struct file *
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		     struct opendata *od, unsigned flags, umode_t mode,
-		     bool *created)
+		     int *opened)
 {
 	int err;
 	u32 perm;
@@ -918,7 +918,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-	filp = finish_open(od, dentry, generic_file_open);
+	filp = finish_open(od, dentry, generic_file_open, opened);
 	if (IS_ERR(filp)) {
 		err = PTR_ERR(filp);
 		goto error;
@@ -930,7 +930,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
 #endif
 
-	*created = true;
+	*opened |= FILE_CREATED;
 out:
 	dput(res);
 	return filp;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a354fe2cb23..3db55471bc9 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -243,7 +243,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 static struct file *
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 			  struct opendata *od, unsigned flags, umode_t omode,
-			  bool *created)
+			  int *opened)
 {
 	int err = 0;
 	gid_t gid;
@@ -357,7 +357,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	filp = finish_open(od, dentry, generic_file_open);
+	filp = finish_open(od, dentry, generic_file_open, opened);
 	if (IS_ERR(filp)) {
 		err = PTR_ERR(filp);
 		goto err_clunk_old_fid;
@@ -367,7 +367,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(inode, filp);
 #endif
-	*created = true;
+	*opened |= FILE_CREATED;
 out:
 	dput(res);
 	return filp;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 75df600ec9b..81e5e908df9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -636,7 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 
 struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			      struct opendata *od, unsigned flags, umode_t mode,
-			      bool *created)
+			      int *opened)
 {
 	int err;
 	struct dentry *res = NULL;
@@ -650,7 +650,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		if (err < 0)
 			return ERR_PTR(err);
 
-		return ceph_lookup_open(dir, dentry, od, flags, mode);
+		return ceph_lookup_open(dir, dentry, od, flags, mode, opened);
 	}
 
 	if (d_unhashed(dentry)) {
@@ -668,8 +668,8 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		return NULL;
 	}
 
-	*created = true;
-	filp = ceph_lookup_open(dir, dentry, od, flags, mode);
+	*opened |= FILE_CREATED;
+	filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened);
 	dput(res);
 
 	return filp;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e34dc22e75a..4c304a90d04 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -214,7 +214,8 @@ out:
  * ceph_release gets called).  So fear not!
  */
 struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-			      struct opendata *od, unsigned flags, umode_t mode)
+			      struct opendata *od, unsigned flags, umode_t mode,
+			      int *opened)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -247,7 +248,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 		err = ceph_handle_notrace_create(dir, dentry);
 	if (err)
 		goto out;
-	file = finish_open(od, req->r_dentry, ceph_open);
+	file = finish_open(od, req->r_dentry, ceph_open, opened);
 	if (IS_ERR(file))
 		err = PTR_ERR(file);
 out:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e61e54673e5..f9a325108b4 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -808,7 +808,7 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 				     struct opendata *od, unsigned flags,
-				     umode_t mode);
+				     umode_t mode, int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
 
 /* dir.c */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 3a572bf5947..92a7c3d8a03 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -48,7 +48,7 @@ extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       struct nameidata *);
 extern struct file *cifs_atomic_open(struct inode *, struct dentry *,
 				     struct opendata *, unsigned, umode_t,
-				     bool *);
+				     int *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7a3dcd15d68..6cdf23fd70e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -160,7 +160,7 @@ check_name(struct dentry *direntry)
 static int cifs_do_create(struct inode *inode, struct dentry *direntry,
 			  int xid, struct tcon_link *tlink, unsigned oflags,
 			  umode_t mode, __u32 *oplock, __u16 *fileHandle,
-			  bool *created)
+			  int *created)
 {
 	int rc = -ENOENT;
 	int create_options = CREATE_NOT_DIR;
@@ -311,7 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry,
 				.device	= 0,
 		};
 
-		*created = true;
+		*created |= FILE_CREATED;
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 			args.uid = (__u64) current_fsuid();
 			if (inode->i_mode & S_ISGID)
@@ -379,7 +379,7 @@ out:
 struct file *
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		 struct opendata *od, unsigned oflags, umode_t mode,
-		 bool *created)
+		 int *opened)
 {
 	int rc;
 	int xid;
@@ -426,14 +426,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	tcon = tlink_tcon(tlink);
 
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
-			    &oplock, &fileHandle, created);
+			    &oplock, &fileHandle, opened);
 
 	if (rc) {
 		filp = ERR_PTR(rc);
 		goto out;
 	}
 
-	filp = finish_open(od, direntry, generic_file_open);
+	filp = finish_open(od, direntry, generic_file_open, opened);
 	if (IS_ERR(filp)) {
 		CIFSSMBClose(xid, tcon, fileHandle);
 		goto out;
@@ -469,7 +469,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 	struct tcon_link *tlink;
 	__u16 fileHandle;
 	__u32 oplock;
-	bool created = true;
+	int created = FILE_CREATED;
 
 	cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
 	     inode, direntry->d_name.name, direntry);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e42442f1da1..345f78ee5c9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -371,7 +371,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  */
 static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
 				     struct opendata *od, unsigned flags,
-				     umode_t mode)
+				     umode_t mode, int *opened)
 {
 	int err;
 	struct inode *inode;
@@ -450,7 +450,7 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	file = finish_open(od, entry, generic_file_open);
+	file = finish_open(od, entry, generic_file_open, opened);
 	if (IS_ERR(file)) {
 		fuse_sync_release(ff, flags);
 	} else {
@@ -472,7 +472,7 @@ out_err:
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
 				     struct opendata *od, unsigned flags,
-				     umode_t mode, bool *created)
+				     umode_t mode, int *opened)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
@@ -492,12 +492,12 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
 		goto no_open;
 
 	/* Only creates */
-	*created = true;
+	*opened |= FILE_CREATED;
 
 	if (fc->no_create)
 		goto mknod;
 
-	file = fuse_create_open(dir, entry, od, flags, mode);
+	file = fuse_create_open(dir, entry, od, flags, mode, opened);
 	if (PTR_ERR(file) == -ENOSYS) {
 		fc->no_create = 1;
 		goto mknod;
diff --git a/fs/namei.c b/fs/namei.c
index 4bc4bc6a693..7a33f074e5b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2197,7 +2197,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 				struct path *path, struct opendata *od,
 				const struct open_flags *op,
 				bool *want_write, bool need_lookup,
-				bool *created)
+				int *opened)
 {
 	struct inode *dir =  nd->path.dentry->d_inode;
 	unsigned open_flag = open_to_namei_flags(op->open_flag);
@@ -2222,7 +2222,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 
 	if (open_flag & O_EXCL) {
 		open_flag &= ~O_TRUNC;
-		*created = true;
+		*opened |= FILE_CREATED;
 	}
 
 	/*
@@ -2272,7 +2272,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	od->dentry = DENTRY_NOT_SET;
 	od->mnt = nd->path.mnt;
 	filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
-				      created);
+				      opened);
 	if (IS_ERR(filp)) {
 		if (WARN_ON(od->dentry != DENTRY_NOT_SET))
 			dput(od->dentry);
@@ -2283,7 +2283,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	}
 
 	acc_mode = op->acc_mode;
-	if (*created) {
+	if (*opened & FILE_CREATED) {
 		fsnotify_create(dir, dentry);
 		acc_mode = MAY_OPEN;
 	}
@@ -2353,7 +2353,7 @@ looked_up:
 static struct file *lookup_open(struct nameidata *nd, struct path *path,
 				struct opendata *od,
 				const struct open_flags *op,
-				bool *want_write, bool *created)
+				bool *want_write, int *opened)
 {
 	struct dentry *dir = nd->path.dentry;
 	struct inode *dir_inode = dir->d_inode;
@@ -2361,7 +2361,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 	int error;
 	bool need_lookup;
 
-	*created = false;
+	*opened &= ~FILE_CREATED;
 	dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup);
 	if (IS_ERR(dentry))
 		return ERR_CAST(dentry);
@@ -2372,7 +2372,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 
 	if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
 		return atomic_open(nd, dentry, path, od, op, want_write,
-				   need_lookup, created);
+				   need_lookup, opened);
 	}
 
 	if (need_lookup) {
@@ -2399,7 +2399,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		if (error)
 			goto out_dput;
 		*want_write = true;
-		*created = true;
+		*opened |= FILE_CREATED;
 		error = security_path_mknod(&nd->path, dentry, mode, 0);
 		if (error)
 			goto out_dput;
@@ -2422,7 +2422,7 @@ out_dput:
  */
 static struct file *do_last(struct nameidata *nd, struct path *path,
 			    struct opendata *od, const struct open_flags *op,
-			    const char *pathname)
+			    int *opened, const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
 	int open_flag = op->open_flag;
@@ -2431,7 +2431,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	int acc_mode = op->acc_mode;
 	struct file *filp;
 	struct inode *inode;
-	bool created;
 	bool symlink_ok = false;
 	struct path save_parent = { .dentry = NULL, .mnt = NULL };
 	bool retried = false;
@@ -2499,21 +2498,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
-	filp = lookup_open(nd, path, od, op, &want_write, &created);
+	filp = lookup_open(nd, path, od, op, &want_write, opened);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 	if (filp) {
 		if (IS_ERR(filp))
 			goto out;
 
-		if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+		if ((*opened & FILE_CREATED) ||
+		    !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			will_truncate = false;
 
 		audit_inode(pathname, filp->f_path.dentry);
 		goto opened;
 	}
 
-	if (created) {
+	if (*opened & FILE_CREATED) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
 		will_truncate = false;
@@ -2606,7 +2606,7 @@ finish_open_created:
 	if (error)
 		goto exit;
 	od->mnt = nd->path.mnt;
-	filp = finish_open(od, nd->path.dentry, NULL);
+	filp = finish_open(od, nd->path.dentry, NULL, opened);
 	if (IS_ERR(filp)) {
 		if (filp == ERR_PTR(-EOPENSTALE))
 			goto stale_open;
@@ -2667,6 +2667,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 	struct opendata od;
 	struct file *res;
 	struct path path;
+	int opened = 0;
 	int error;
 
 	od.filp = get_empty_filp();
@@ -2684,7 +2685,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 	if (unlikely(error))
 		goto out_filp;
 
-	res = do_last(nd, &path, &od, op, pathname);
+	res = do_last(nd, &path, &od, op, &opened, pathname);
 	while (unlikely(!res)) { /* trailing symlink */
 		struct path link = path;
 		void *cookie;
@@ -2699,7 +2700,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 		error = follow_link(&link, nd, &cookie);
 		if (unlikely(error))
 			goto out_filp;
-		res = do_last(nd, &path, &od, op, pathname);
+		res = do_last(nd, &path, &od, op, &opened, pathname);
 		put_link(nd, &link, cookie);
 	}
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e6d55dc93ff..6deb2549ead 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -113,7 +113,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
 
 static struct file *nfs_atomic_open(struct inode *, struct dentry *,
 				    struct opendata *, unsigned, umode_t,
-				    bool *);
+				    int *);
 const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
@@ -1389,7 +1389,8 @@ static int do_open(struct inode *inode, struct file *filp)
 
 static struct file *nfs_finish_open(struct nfs_open_context *ctx,
 				    struct dentry *dentry,
-				    struct opendata *od, unsigned open_flags)
+				    struct opendata *od, unsigned open_flags,
+				    int *opened)
 {
 	struct file *filp;
 	int err;
@@ -1408,7 +1409,7 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx,
 		}
 	}
 
-	filp = finish_open(od, dentry, do_open);
+	filp = finish_open(od, dentry, do_open, opened);
 	if (!IS_ERR(filp))
 		nfs_file_set_open_context(filp, ctx);
 
@@ -1419,7 +1420,7 @@ out:
 
 static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 				    struct opendata *od, unsigned open_flags,
-				    umode_t mode, bool *created)
+				    umode_t mode, int *opened)
 {
 	struct nfs_open_context *ctx;
 	struct dentry *res;
@@ -1497,7 +1498,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	nfs_unblock_sillyrename(dentry->d_parent);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 
-	filp = nfs_finish_open(ctx, dentry, od, open_flags);
+	filp = nfs_finish_open(ctx, dentry, od, open_flags, opened);
 
 	dput(res);
 	return filp;
diff --git a/fs/open.c b/fs/open.c
index 937f4ec2018..89589bd3993 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -782,7 +782,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
  * filesystem callback is substituted.
  */
 struct file *finish_open(struct opendata *od, struct dentry *dentry,
-			 int (*open)(struct inode *, struct file *))
+			 int (*open)(struct inode *, struct file *),
+			 int *opened)
 {
 	struct file *res;
 
@@ -790,8 +791,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry,
 	dget(dentry);
 
 	res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
-	if (!IS_ERR(res))
+	if (!IS_ERR(res)) {
+		*opened |= FILE_OPENED;
 		od->filp = NULL;
+	}
 
 	return res;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0314635cf83..a7618cf28d0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1696,7 +1696,7 @@ struct inode_operations {
 	int (*update_time)(struct inode *, struct timespec *, int);
 	struct file * (*atomic_open)(struct inode *, struct dentry *,
 				     struct opendata *, unsigned open_flag,
-				     umode_t create_mode, bool *created);
+				     umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
 struct seq_file;
@@ -2065,8 +2065,13 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
 				 const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
+enum {
+	FILE_CREATED = 1,
+	FILE_OPENED = 2
+};
 extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
-				int (*open)(struct inode *, struct file *));
+				int (*open)(struct inode *, struct file *),
+				int *opened);
 extern void finish_no_open(struct opendata *od, struct dentry *dentry);
 
 /* fs/ioctl.c */
-- 
cgit v1.2.3-70-g09d2


From 3d8a00d2099ebc6d5a6e95fadaf861709d9919a8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 05:04:43 -0400
Subject: don't modify od->filp at all

make put_filp() conditional on flag set by finish_open()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 4 +---
 fs/open.c  | 5 ++---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 7a33f074e5b..18b9326d951 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2708,10 +2708,8 @@ out:
 		path_put(&nd->root);
 	if (base)
 		fput(base);
-	if (od.filp) {
-		BUG_ON(od.filp->f_path.dentry);
+	if (!(opened & FILE_OPENED))
 		put_filp(od.filp);
-	}
 	if (res == ERR_PTR(-EOPENSTALE)) {
 		if (flags & LOOKUP_RCU)
 			res = ERR_PTR(-ECHILD);
diff --git a/fs/open.c b/fs/open.c
index 89589bd3993..c87f98201c2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,15 +786,14 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry,
 			 int *opened)
 {
 	struct file *res;
+	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
 	mntget(od->mnt);
 	dget(dentry);
 
 	res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
-	if (!IS_ERR(res)) {
+	if (!IS_ERR(res))
 		*opened |= FILE_OPENED;
-		od->filp = NULL;
-	}
 
 	return res;
 }
-- 
cgit v1.2.3-70-g09d2


From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 22 Jun 2012 12:39:14 +0400
Subject: make ->atomic_open() return int

Change of calling conventions:
old		new
NULL		1
file		0
ERR_PTR(-ve)	-ve

Caller *knows* that struct file *; no need to return it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  6 ++---
 fs/9p/vfs_inode.c                 | 10 +++----
 fs/9p/vfs_inode_dotl.c            | 14 +++++-----
 fs/ceph/dir.c                     | 19 +++++++------
 fs/ceph/file.c                    | 12 ++++-----
 fs/ceph/super.h                   |  6 ++---
 fs/cifs/cifsfs.h                  |  6 ++---
 fs/cifs/dir.c                     | 17 ++++++------
 fs/fuse/dir.c                     | 33 +++++++++++------------
 fs/namei.c                        | 14 +++++-----
 fs/nfs/dir.c                      | 57 +++++++++++++++++++--------------------
 include/linux/fs.h                |  6 ++---
 13 files changed, 97 insertions(+), 105 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index af4e45bd6cf..46a24a6ed09 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,7 +62,7 @@ ata *);
 	int (*removexattr) (struct dentry *, const char *);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
-	struct file * (*atomic_open)(struct inode *, struct dentry *,
+	int (*atomic_open)(struct inode *, struct dentry *,
 				struct opendata *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index d7121051afc..d0d690bbc4c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,7 +364,7 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*update_time)(struct inode *, struct timespec *, int);
-	struct file * (*atomic_open)(struct inode *, struct dentry *,
+	int (*atomic_open)(struct inode *, struct dentry *,
 				struct opendata *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 };
@@ -482,8 +482,8 @@ otherwise noted.
   atomic_open: called on the last component of an open.  Using this optional
   	method the filesystem can look up, possibly create and open the file in
   	one atomic operation.  If it cannot perform this (e.g. the file type
-  	turned out to be wrong) it may signal this by returning NULL instead of
-  	an open struct file pointer.  This method is only called if the last
+  	turned out to be wrong) it may signal this by returning 1 instead of
+  	usual 0 or -ve .  This method is only called if the last
   	component is negative or needs lookup.  Cached positive dentries are
   	still handled by f_op->open().
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index de626b3b342..62ce8daefa9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -856,7 +856,7 @@ error:
 	return ERR_PTR(result);
 }
 
-static struct file *
+static int
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		     struct opendata *od, unsigned flags, umode_t mode,
 		     int *opened)
@@ -872,7 +872,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (d_unhashed(dentry)) {
 		res = v9fs_vfs_lookup(dir, dentry, NULL);
 		if (IS_ERR(res))
-			return ERR_CAST(res);
+			return PTR_ERR(res);
 
 		if (res)
 			dentry = res;
@@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	/* Only creates */
 	if (!(flags & O_CREAT) || dentry->d_inode) {
 		finish_no_open(od, res);
-		return NULL;
+		return 1;
 	}
 
 	err = 0;
@@ -933,13 +933,11 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	*opened |= FILE_CREATED;
 out:
 	dput(res);
-	return filp;
+	return err;
 
 error:
 	if (fid)
 		p9_client_clunk(fid);
-
-	filp = ERR_PTR(err);
 	goto out;
 }
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 3db55471bc9..69f05109f75 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -240,7 +240,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
 }
 
-static struct file *
+static int
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 			  struct opendata *od, unsigned flags, umode_t omode,
 			  int *opened)
@@ -262,7 +262,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	if (d_unhashed(dentry)) {
 		res = v9fs_vfs_lookup(dir, dentry, NULL);
 		if (IS_ERR(res))
-			return ERR_CAST(res);
+			return PTR_ERR(res);
 
 		if (res)
 			dentry = res;
@@ -271,7 +271,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	/* Only creates */
 	if (!(flags & O_CREAT) || dentry->d_inode) {
 		finish_no_open(od, res);
-		return NULL;
+		return 1;
 	}
 
 	v9ses = v9fs_inode2v9ses(dir);
@@ -284,7 +284,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
 		p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
-		goto err_return;
+		goto out;
 	}
 
 	/* clone a fid to use for creation */
@@ -292,7 +292,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(ofid)) {
 		err = PTR_ERR(ofid);
 		p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
-		goto err_return;
+		goto out;
 	}
 
 	gid = v9fs_get_fsgid_for_create(dir);
@@ -370,7 +370,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	*opened |= FILE_CREATED;
 out:
 	dput(res);
-	return filp;
+	return err;
 
 error:
 	if (fid)
@@ -379,8 +379,6 @@ err_clunk_old_fid:
 	if (ofid)
 		p9_client_clunk(ofid);
 	v9fs_set_create_acl(NULL, &dacl, &pacl);
-err_return:
-	filp = ERR_PTR(err);
 	goto out;
 }
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 81e5e908df9..d8bfabeeaa2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -634,21 +634,20 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	return dentry;
 }
 
-struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-			      struct opendata *od, unsigned flags, umode_t mode,
-			      int *opened)
+int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+		     struct opendata *od, unsigned flags, umode_t mode,
+		     int *opened)
 {
 	int err;
 	struct dentry *res = NULL;
-	struct file *filp;
 
 	if (!(flags & O_CREAT)) {
 		if (dentry->d_name.len > NAME_MAX)
-			return ERR_PTR(-ENAMETOOLONG);
+			return -ENAMETOOLONG;
 
 		err = ceph_init_dentry(dentry);
 		if (err < 0)
-			return ERR_PTR(err);
+			return err;
 
 		return ceph_lookup_open(dir, dentry, od, flags, mode, opened);
 	}
@@ -656,7 +655,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (d_unhashed(dentry)) {
 		res = ceph_lookup(dir, dentry, NULL);
 		if (IS_ERR(res))
-			return ERR_CAST(res);
+			return PTR_ERR(res);
 
 		if (res)
 			dentry = res;
@@ -665,14 +664,14 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	/* We don't deal with positive dentries here */
 	if (dentry->d_inode) {
 		finish_no_open(od, res);
-		return NULL;
+		return 1;
 	}
 
 	*opened |= FILE_CREATED;
-	filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened);
+	err = ceph_lookup_open(dir, dentry, od, flags, mode, opened);
 	dput(res);
 
-	return filp;
+	return err;
 }
 
 /*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4c304a90d04..b8cc3ee5401 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,9 +213,9 @@ out:
  * may_open() fails, the struct *file gets cleaned up (i.e.
  * ceph_release gets called).  So fear not!
  */
-struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-			      struct opendata *od, unsigned flags, umode_t mode,
-			      int *opened)
+int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+		     struct opendata *od, unsigned flags, umode_t mode,
+		     int *opened)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -230,7 +230,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 	/* do the open */
 	req = prepare_open_request(dir->i_sb, flags, mode);
 	if (IS_ERR(req))
-		return ERR_CAST(req);
+		return PTR_ERR(req);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	if (flags & O_CREAT) {
@@ -257,10 +257,10 @@ out:
 	dout("ceph_lookup_open result=%p\n", ret);
 
 	if (IS_ERR(ret))
-		return ERR_CAST(ret);
+		return PTR_ERR(ret);
 
 	dput(ret);
-	return err ? ERR_PTR(err) : file;
+	return err;
 }
 
 int ceph_release(struct inode *inode, struct file *file)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f9a325108b4..f7e8e82ec47 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
 				    loff_t off, size_t len);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
-extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				     struct opendata *od, unsigned flags,
-				     umode_t mode, int *opened);
+extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+			     struct opendata *od, unsigned flags,
+			     umode_t mode, int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
 
 /* dir.c */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 92a7c3d8a03..58d9aca46a4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -46,9 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       struct nameidata *);
-extern struct file *cifs_atomic_open(struct inode *, struct dentry *,
-				     struct opendata *, unsigned, umode_t,
-				     int *);
+extern int cifs_atomic_open(struct inode *, struct dentry *,
+			    struct opendata *, unsigned, umode_t,
+			    int *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6cdf23fd70e..8ca70b102b9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -376,7 +376,7 @@ out:
 	return rc;
 }
 
-struct file *
+int
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		 struct opendata *od, unsigned oflags, umode_t mode,
 		 int *opened)
@@ -403,15 +403,15 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if (!(oflags & O_CREAT)) {
 		struct dentry *res = cifs_lookup(inode, direntry, NULL);
 		if (IS_ERR(res))
-			return ERR_CAST(res);
+			return PTR_ERR(res);
 
 		finish_no_open(od, res);
-		return NULL;
+		return 1;
 	}
 
 	rc = check_name(direntry);
 	if (rc)
-		return ERR_PTR(rc);
+		return rc;
 
 	xid = GetXid();
 
@@ -428,13 +428,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
 			    &oplock, &fileHandle, opened);
 
-	if (rc) {
-		filp = ERR_PTR(rc);
+	if (rc)
 		goto out;
-	}
 
 	filp = finish_open(od, direntry, generic_file_open, opened);
 	if (IS_ERR(filp)) {
+		rc = PTR_ERR(filp);
 		CIFSSMBClose(xid, tcon, fileHandle);
 		goto out;
 	}
@@ -443,14 +442,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if (pfile_info == NULL) {
 		CIFSSMBClose(xid, tcon, fileHandle);
 		fput(filp);
-		filp = ERR_PTR(-ENOMEM);
+		rc = -ENOMEM;
 	}
 
 out:
 	cifs_put_tlink(tlink);
 free_xid:
 	FreeXid(xid);
-	return filp;
+	return rc;
 }
 
 int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 345f78ee5c9..8a9ca09e87d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,9 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  * If the filesystem doesn't support this, then fall back to separate
  * 'mknod' + 'open' requests.
  */
-static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
-				     struct opendata *od, unsigned flags,
-				     umode_t mode, int *opened)
+static int fuse_create_open(struct inode *dir, struct dentry *entry,
+			    struct opendata *od, unsigned flags,
+			    umode_t mode, int *opened)
 {
 	int err;
 	struct inode *inode;
@@ -452,12 +452,14 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
 	fuse_invalidate_attr(dir);
 	file = finish_open(od, entry, generic_file_open, opened);
 	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
 		fuse_sync_release(ff, flags);
 	} else {
 		file->private_data = fuse_file_get(ff);
 		fuse_finish_open(inode, file);
+		err = 0;
 	}
-	return file;
+	return err;
 
 out_free_ff:
 	fuse_file_free(ff);
@@ -466,23 +468,22 @@ out_put_request:
 out_put_forget_req:
 	kfree(forget);
 out_err:
-	return ERR_PTR(err);
+	return err;
 }
 
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
-				     struct opendata *od, unsigned flags,
-				     umode_t mode, int *opened)
+static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+			    struct opendata *od, unsigned flags,
+			    umode_t mode, int *opened)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct file *file;
 	struct dentry *res = NULL;
 
 	if (d_unhashed(entry)) {
 		res = fuse_lookup(dir, entry, NULL);
 		if (IS_ERR(res))
-			return ERR_CAST(res);
+			return PTR_ERR(res);
 
 		if (res)
 			entry = res;
@@ -497,24 +498,22 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	if (fc->no_create)
 		goto mknod;
 
-	file = fuse_create_open(dir, entry, od, flags, mode, opened);
-	if (PTR_ERR(file) == -ENOSYS) {
+	err = fuse_create_open(dir, entry, od, flags, mode, opened);
+	if (err == -ENOSYS) {
 		fc->no_create = 1;
 		goto mknod;
 	}
 out_dput:
 	dput(res);
-	return file;
+	return err;
 
 mknod:
 	err = fuse_mknod(dir, entry, mode, 0);
-	if (err) {
-		file = ERR_PTR(err);
+	if (err)
 		goto out_dput;
-	}
 no_open:
 	finish_no_open(od, res);
-	return NULL;
+	return 1;
 }
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 18b9326d951..f0dae0057ec 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2204,7 +2204,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	umode_t mode;
 	int error;
 	int acc_mode;
-	struct file *filp;
+	struct file *filp = NULL;
 	int create_error = 0;
 	struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
 
@@ -2271,14 +2271,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 
 	od->dentry = DENTRY_NOT_SET;
 	od->mnt = nd->path.mnt;
-	filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
+	error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
 				      opened);
-	if (IS_ERR(filp)) {
+	if (error < 0) {
 		if (WARN_ON(od->dentry != DENTRY_NOT_SET))
 			dput(od->dentry);
 
-		if (create_error && PTR_ERR(filp) == -ENOENT)
-			filp = ERR_PTR(create_error);
+		if (create_error && error == -ENOENT)
+			error = create_error;
+		filp = ERR_PTR(error);
 		goto out;
 	}
 
@@ -2288,7 +2289,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 		acc_mode = MAY_OPEN;
 	}
 
-	if (!filp) {
+	if (error) {	/* returned 1, that is */
 		if (WARN_ON(od->dentry == DENTRY_NOT_SET)) {
 			filp = ERR_PTR(-EIO);
 			goto out;
@@ -2304,6 +2305,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * We didn't have the inode before the open, so check open permission
 	 * here.
 	 */
+	filp = od->filp;
 	error = may_open(&filp->f_path, acc_mode, open_flag);
 	if (error) {
 		fput(filp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6deb2549ead..b56f4b36ed4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -111,9 +111,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
 
 #ifdef CONFIG_NFS_V4
 
-static struct file *nfs_atomic_open(struct inode *, struct dentry *,
-				    struct opendata *, unsigned, umode_t,
-				    int *);
+static int nfs_atomic_open(struct inode *, struct dentry *,
+			   struct opendata *, unsigned, umode_t,
+			   int *);
 const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
@@ -1387,10 +1387,10 @@ static int do_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file *nfs_finish_open(struct nfs_open_context *ctx,
-				    struct dentry *dentry,
-				    struct opendata *od, unsigned open_flags,
-				    int *opened)
+static int nfs_finish_open(struct nfs_open_context *ctx,
+			   struct dentry *dentry,
+			   struct opendata *od, unsigned open_flags,
+			   int *opened)
 {
 	struct file *filp;
 	int err;
@@ -1403,30 +1403,31 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx,
 	/* If the open_intent is for execute, we have an extra check to make */
 	if (ctx->mode & FMODE_EXEC) {
 		err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
-		if (err < 0) {
-			filp = ERR_PTR(err);
+		if (err < 0)
 			goto out;
-		}
 	}
 
 	filp = finish_open(od, dentry, do_open, opened);
-	if (!IS_ERR(filp))
-		nfs_file_set_open_context(filp, ctx);
+	if (IS_ERR(filp)) {
+		err = PTR_ERR(filp);
+		goto out;
+	}
+	nfs_file_set_open_context(filp, ctx);
+	err = 0;
 
 out:
 	put_nfs_open_context(ctx);
-	return filp;
+	return err;
 }
 
-static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
-				    struct opendata *od, unsigned open_flags,
-				    umode_t mode, int *opened)
+static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+			    struct opendata *od, unsigned open_flags,
+			    umode_t mode, int *opened)
 {
 	struct nfs_open_context *ctx;
 	struct dentry *res;
 	struct iattr attr = { .ia_valid = ATTR_OPEN };
 	struct inode *inode;
-	struct file *filp;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1437,21 +1438,19 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 
 	/* NFS only supports OPEN on regular files */
 	if ((open_flags & O_DIRECTORY)) {
-		err = -ENOENT;
 		if (!d_unhashed(dentry)) {
 			/*
 			 * Hashed negative dentry with O_DIRECTORY: dentry was
 			 * revalidated and is fine, no need to perform lookup
 			 * again
 			 */
-			goto out_err;
+			return -ENOENT;
 		}
 		goto no_open;
 	}
 
-	err = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
-		goto out_err;
+		return -ENAMETOOLONG;
 
 	if (open_flags & O_CREAT) {
 		attr.ia_valid |= ATTR_MODE;
@@ -1465,7 +1464,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	ctx = create_nfs_open_context(dentry, open_flags);
 	err = PTR_ERR(ctx);
 	if (IS_ERR(ctx))
-		goto out_err;
+		goto out;
 
 	nfs_block_sillyrename(dentry->d_parent);
 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
@@ -1489,7 +1488,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		default:
 			break;
 		}
-		goto out_err;
+		goto out;
 	}
 	res = d_add_unique(dentry, inode);
 	if (res != NULL)
@@ -1498,22 +1497,20 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	nfs_unblock_sillyrename(dentry->d_parent);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 
-	filp = nfs_finish_open(ctx, dentry, od, open_flags, opened);
+	err = nfs_finish_open(ctx, dentry, od, open_flags, opened);
 
 	dput(res);
-	return filp;
-
-out_err:
-	return ERR_PTR(err);
+out:
+	return err;
 
 no_open:
 	res = nfs_lookup(dir, dentry, NULL);
 	err = PTR_ERR(res);
 	if (IS_ERR(res))
-		goto out_err;
+		goto out;
 
 	finish_no_open(od, res);
-	return NULL;
+	return 1;
 }
 
 static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a7618cf28d0..33bda922988 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1694,9 +1694,9 @@ struct inode_operations {
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 	int (*update_time)(struct inode *, struct timespec *, int);
-	struct file * (*atomic_open)(struct inode *, struct dentry *,
-				     struct opendata *, unsigned open_flag,
-				     umode_t create_mode, int *opened);
+	int (*atomic_open)(struct inode *, struct dentry *,
+			   struct opendata *, unsigned open_flag,
+			   umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
 struct seq_file;
-- 
cgit v1.2.3-70-g09d2


From a4a3bdd778715999ddfeefdc52ab76254580fa76 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 05:55:37 -0400
Subject: kill opendata->{mnt,dentry}

->filp->f_path is there for purpose...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  2 --
 fs/namei.c    | 15 ++++++---------
 fs/open.c     |  6 +++---
 3 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/internal.h b/fs/internal.h
index ae69a3b150d..09003a02292 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -83,8 +83,6 @@ extern struct super_block *user_get_super(dev_t);
  * open.c
  */
 struct opendata {
-	struct dentry *dentry;
-	struct vfsmount *mnt;
 	struct file *filp;
 };
 struct open_flags {
diff --git a/fs/namei.c b/fs/namei.c
index f0dae0057ec..af83ede92a4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2269,14 +2269,11 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	if (nd->flags & LOOKUP_DIRECTORY)
 		open_flag |= O_DIRECTORY;
 
-	od->dentry = DENTRY_NOT_SET;
-	od->mnt = nd->path.mnt;
+	od->filp->f_path.dentry = DENTRY_NOT_SET;
+	od->filp->f_path.mnt = nd->path.mnt;
 	error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
 				      opened);
 	if (error < 0) {
-		if (WARN_ON(od->dentry != DENTRY_NOT_SET))
-			dput(od->dentry);
-
 		if (create_error && error == -ENOENT)
 			error = create_error;
 		filp = ERR_PTR(error);
@@ -2290,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	}
 
 	if (error) {	/* returned 1, that is */
-		if (WARN_ON(od->dentry == DENTRY_NOT_SET)) {
+		if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) {
 			filp = ERR_PTR(-EIO);
 			goto out;
 		}
-		if (od->dentry) {
+		if (od->filp->f_path.dentry) {
 			dput(dentry);
-			dentry = od->dentry;
+			dentry = od->filp->f_path.dentry;
 		}
 		goto looked_up;
 	}
@@ -2607,7 +2604,7 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto exit;
-	od->mnt = nd->path.mnt;
+	od->filp->f_path.mnt = nd->path.mnt;
 	filp = finish_open(od, nd->path.dentry, NULL, opened);
 	if (IS_ERR(filp)) {
 		if (filp == ERR_PTR(-EOPENSTALE))
diff --git a/fs/open.c b/fs/open.c
index c87f98201c2..2b1654d8bfb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -788,10 +788,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry,
 	struct file *res;
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
-	mntget(od->mnt);
+	mntget(od->filp->f_path.mnt);
 	dget(dentry);
 
-	res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
+	res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred());
 	if (!IS_ERR(res))
 		*opened |= FILE_OPENED;
 
@@ -810,7 +810,7 @@ EXPORT_SYMBOL(finish_open);
  */
 void finish_no_open(struct opendata *od, struct dentry *dentry)
 {
-	od->dentry = dentry;
+	od->filp->f_path.dentry = dentry;
 }
 EXPORT_SYMBOL(finish_no_open);
 
-- 
cgit v1.2.3-70-g09d2


From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 22 Jun 2012 12:40:19 +0400
Subject: kill struct opendata

Just pass struct file *.  Methods are happier that way...
There's no need to return struct file * from finish_open() now,
so let it return int.  Next: saner prototypes for parts in
namei.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/9p/vfs_inode.c                 | 15 +++++-------
 fs/9p/vfs_inode_dotl.c            | 15 +++++-------
 fs/ceph/dir.c                     |  8 +++----
 fs/ceph/file.c                    |  7 ++----
 fs/ceph/super.h                   |  2 +-
 fs/cifs/cifsfs.h                  |  2 +-
 fs/cifs/dir.c                     |  9 ++++----
 fs/fuse/dir.c                     | 15 +++++-------
 fs/internal.h                     |  3 ---
 fs/namei.c                        | 48 ++++++++++++++++++++-------------------
 fs/nfs/dir.c                      | 20 +++++++---------
 fs/open.c                         | 20 ++++++++--------
 include/linux/fs.h                | 11 ++++-----
 15 files changed, 81 insertions(+), 98 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 46a24a6ed09..33e5243948f 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -63,7 +63,7 @@ ata *);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
-				struct opendata *, unsigned open_flag,
+				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 
 locking rules:
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index d0d690bbc4c..279de219036 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -365,7 +365,7 @@ struct inode_operations {
 	int (*removexattr) (struct dentry *, const char *);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
-				struct opendata *, unsigned open_flag,
+				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 };
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 62ce8daefa9..2b05651e0c3 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -858,12 +858,11 @@ error:
 
 static int
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct opendata *od, unsigned flags, umode_t mode,
+		     struct file *file, unsigned flags, umode_t mode,
 		     int *opened)
 {
 	int err;
 	u32 perm;
-	struct file *filp;
 	struct v9fs_inode *v9inode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid, *inode_fid;
@@ -880,7 +879,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 
 	/* Only creates */
 	if (!(flags & O_CREAT) || dentry->d_inode) {
-		finish_no_open(od, res);
+		finish_no_open(file, res);
 		return 1;
 	}
 
@@ -918,16 +917,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-	filp = finish_open(od, dentry, generic_file_open, opened);
-	if (IS_ERR(filp)) {
-		err = PTR_ERR(filp);
+	err = finish_open(file, dentry, generic_file_open, opened);
+	if (err)
 		goto error;
-	}
 
-	filp->private_data = fid;
+	file->private_data = fid;
 #ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
-		v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+		v9fs_cache_inode_set_cookie(dentry->d_inode, file);
 #endif
 
 	*opened |= FILE_CREATED;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 69f05109f75..cfaebdef974 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -242,14 +242,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 
 static int
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
-			  struct opendata *od, unsigned flags, umode_t omode,
+			  struct file *file, unsigned flags, umode_t omode,
 			  int *opened)
 {
 	int err = 0;
 	gid_t gid;
 	umode_t mode;
 	char *name = NULL;
-	struct file *filp;
 	struct p9_qid qid;
 	struct inode *inode;
 	struct p9_fid *fid = NULL;
@@ -270,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
 	/* Only creates */
 	if (!(flags & O_CREAT) || dentry->d_inode) {
-		finish_no_open(od, res);
+		finish_no_open(file, res);
 		return 1;
 	}
 
@@ -357,15 +356,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	filp = finish_open(od, dentry, generic_file_open, opened);
-	if (IS_ERR(filp)) {
-		err = PTR_ERR(filp);
+	err = finish_open(file, dentry, generic_file_open, opened);
+	if (err)
 		goto err_clunk_old_fid;
-	}
-	filp->private_data = ofid;
+	file->private_data = ofid;
 #ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
-		v9fs_cache_inode_set_cookie(inode, filp);
+		v9fs_cache_inode_set_cookie(inode, file);
 #endif
 	*opened |= FILE_CREATED;
 out:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d8bfabeeaa2..80c848e0539 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -635,7 +635,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct opendata *od, unsigned flags, umode_t mode,
+		     struct file *file, unsigned flags, umode_t mode,
 		     int *opened)
 {
 	int err;
@@ -649,7 +649,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		if (err < 0)
 			return err;
 
-		return ceph_lookup_open(dir, dentry, od, flags, mode, opened);
+		return ceph_lookup_open(dir, dentry, file, flags, mode, opened);
 	}
 
 	if (d_unhashed(dentry)) {
@@ -663,12 +663,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 
 	/* We don't deal with positive dentries here */
 	if (dentry->d_inode) {
-		finish_no_open(od, res);
+		finish_no_open(file, res);
 		return 1;
 	}
 
 	*opened |= FILE_CREATED;
-	err = ceph_lookup_open(dir, dentry, od, flags, mode, opened);
+	err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
 	dput(res);
 
 	return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b8cc3ee5401..1b81d6c3187 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -214,12 +214,11 @@ out:
  * ceph_release gets called).  So fear not!
  */
 int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-		     struct opendata *od, unsigned flags, umode_t mode,
+		     struct file *file, unsigned flags, umode_t mode,
 		     int *opened)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct file *file = NULL;
 	struct ceph_mds_request *req;
 	struct dentry *ret;
 	int err;
@@ -248,9 +247,7 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 		err = ceph_handle_notrace_create(dir, dentry);
 	if (err)
 		goto out;
-	file = finish_open(od, req->r_dentry, ceph_open, opened);
-	if (IS_ERR(file))
-		err = PTR_ERR(file);
+	err = finish_open(file, req->r_dentry, ceph_open, opened);
 out:
 	ret = ceph_finish_lookup(req, dentry, err);
 	ceph_mdsc_put_request(req);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f7e8e82ec47..f4d5522cb61 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -807,7 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages,
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-			     struct opendata *od, unsigned flags,
+			     struct file *od, unsigned flags,
 			     umode_t mode, int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 58d9aca46a4..48bb474ce29 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       struct nameidata *);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
-			    struct opendata *, unsigned, umode_t,
+			    struct file *, unsigned, umode_t,
 			    int *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 8ca70b102b9..c00c192f17e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -378,7 +378,7 @@ out:
 
 int
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
-		 struct opendata *od, unsigned oflags, umode_t mode,
+		 struct file *file, unsigned oflags, umode_t mode,
 		 int *opened)
 {
 	int rc;
@@ -405,7 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
-		finish_no_open(od, res);
+		finish_no_open(file, res);
 		return 1;
 	}
 
@@ -431,9 +431,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if (rc)
 		goto out;
 
-	filp = finish_open(od, direntry, generic_file_open, opened);
-	if (IS_ERR(filp)) {
-		rc = PTR_ERR(filp);
+	rc = finish_open(file, direntry, generic_file_open, opened);
+	if (rc) {
 		CIFSSMBClose(xid, tcon, fileHandle);
 		goto out;
 	}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8a9ca09e87d..110db5425dc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -370,7 +370,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  * 'mknod' + 'open' requests.
  */
 static int fuse_create_open(struct inode *dir, struct dentry *entry,
-			    struct opendata *od, unsigned flags,
+			    struct file *file, unsigned flags,
 			    umode_t mode, int *opened)
 {
 	int err;
@@ -382,7 +382,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	struct fuse_open_out outopen;
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
-	struct file *file;
 
 	forget = fuse_alloc_forget();
 	err = -ENOMEM;
@@ -450,14 +449,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	file = finish_open(od, entry, generic_file_open, opened);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
+	err = finish_open(file, entry, generic_file_open, opened);
+	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
 		file->private_data = fuse_file_get(ff);
 		fuse_finish_open(inode, file);
-		err = 0;
 	}
 	return err;
 
@@ -473,7 +470,7 @@ out_err:
 
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
-			    struct opendata *od, unsigned flags,
+			    struct file *file, unsigned flags,
 			    umode_t mode, int *opened)
 {
 	int err;
@@ -498,7 +495,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	if (fc->no_create)
 		goto mknod;
 
-	err = fuse_create_open(dir, entry, od, flags, mode, opened);
+	err = fuse_create_open(dir, entry, file, flags, mode, opened);
 	if (err == -ENOSYS) {
 		fc->no_create = 1;
 		goto mknod;
@@ -512,7 +509,7 @@ mknod:
 	if (err)
 		goto out_dput;
 no_open:
-	finish_no_open(od, res);
+	finish_no_open(file, res);
 	return 1;
 }
 
diff --git a/fs/internal.h b/fs/internal.h
index 09003a02292..8a9f5fa840f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,9 +82,6 @@ extern struct super_block *user_get_super(dev_t);
 /*
  * open.c
  */
-struct opendata {
-	struct file *filp;
-};
 struct open_flags {
 	int open_flag;
 	umode_t mode;
diff --git a/fs/namei.c b/fs/namei.c
index af83ede92a4..aaff8a86215 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2194,7 +2194,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 }
 
 static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
-				struct path *path, struct opendata *od,
+				struct path *path, struct file *file,
 				const struct open_flags *op,
 				bool *want_write, bool need_lookup,
 				int *opened)
@@ -2269,9 +2269,9 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	if (nd->flags & LOOKUP_DIRECTORY)
 		open_flag |= O_DIRECTORY;
 
-	od->filp->f_path.dentry = DENTRY_NOT_SET;
-	od->filp->f_path.mnt = nd->path.mnt;
-	error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
+	file->f_path.dentry = DENTRY_NOT_SET;
+	file->f_path.mnt = nd->path.mnt;
+	error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
 				      opened);
 	if (error < 0) {
 		if (create_error && error == -ENOENT)
@@ -2287,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	}
 
 	if (error) {	/* returned 1, that is */
-		if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) {
+		if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
 			filp = ERR_PTR(-EIO);
 			goto out;
 		}
-		if (od->filp->f_path.dentry) {
+		if (file->f_path.dentry) {
 			dput(dentry);
-			dentry = od->filp->f_path.dentry;
+			dentry = file->f_path.dentry;
 		}
 		goto looked_up;
 	}
@@ -2302,7 +2302,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * We didn't have the inode before the open, so check open permission
 	 * here.
 	 */
-	filp = od->filp;
+	filp = file;
 	error = may_open(&filp->f_path, acc_mode, open_flag);
 	if (error) {
 		fput(filp);
@@ -2350,7 +2350,7 @@ looked_up:
  * was performed, only lookup.
  */
 static struct file *lookup_open(struct nameidata *nd, struct path *path,
-				struct opendata *od,
+				struct file *file,
 				const struct open_flags *op,
 				bool *want_write, int *opened)
 {
@@ -2370,7 +2370,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 		goto out_no_open;
 
 	if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
-		return atomic_open(nd, dentry, path, od, op, want_write,
+		return atomic_open(nd, dentry, path, file, op, want_write,
 				   need_lookup, opened);
 	}
 
@@ -2420,7 +2420,7 @@ out_dput:
  * Handle the last step of open()
  */
 static struct file *do_last(struct nameidata *nd, struct path *path,
-			    struct opendata *od, const struct open_flags *op,
+			    struct file *file, const struct open_flags *op,
 			    int *opened, const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
@@ -2497,7 +2497,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
-	filp = lookup_open(nd, path, od, op, &want_write, opened);
+	filp = lookup_open(nd, path, file, op, &want_write, opened);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 	if (filp) {
@@ -2604,13 +2604,15 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto exit;
-	od->filp->f_path.mnt = nd->path.mnt;
-	filp = finish_open(od, nd->path.dentry, NULL, opened);
-	if (IS_ERR(filp)) {
-		if (filp == ERR_PTR(-EOPENSTALE))
+	file->f_path.mnt = nd->path.mnt;
+	error = finish_open(file, nd->path.dentry, NULL, opened);
+	if (error) {
+		filp = ERR_PTR(error);
+		if (error == -EOPENSTALE)
 			goto stale_open;
 		goto out;
 	}
+	filp = file;
 opened:
 	error = open_check_o_direct(filp);
 	if (error)
@@ -2663,17 +2665,17 @@ static struct file *path_openat(int dfd, const char *pathname,
 		struct nameidata *nd, const struct open_flags *op, int flags)
 {
 	struct file *base = NULL;
-	struct opendata od;
+	struct file *file;
 	struct file *res;
 	struct path path;
 	int opened = 0;
 	int error;
 
-	od.filp = get_empty_filp();
-	if (!od.filp)
+	file = get_empty_filp();
+	if (!file)
 		return ERR_PTR(-ENFILE);
 
-	od.filp->f_flags = op->open_flag;
+	file->f_flags = op->open_flag;
 
 	error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
 	if (unlikely(error))
@@ -2684,7 +2686,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 	if (unlikely(error))
 		goto out_filp;
 
-	res = do_last(nd, &path, &od, op, &opened, pathname);
+	res = do_last(nd, &path, file, op, &opened, pathname);
 	while (unlikely(!res)) { /* trailing symlink */
 		struct path link = path;
 		void *cookie;
@@ -2699,7 +2701,7 @@ static struct file *path_openat(int dfd, const char *pathname,
 		error = follow_link(&link, nd, &cookie);
 		if (unlikely(error))
 			goto out_filp;
-		res = do_last(nd, &path, &od, op, &opened, pathname);
+		res = do_last(nd, &path, file, op, &opened, pathname);
 		put_link(nd, &link, cookie);
 	}
 out:
@@ -2708,7 +2710,7 @@ out:
 	if (base)
 		fput(base);
 	if (!(opened & FILE_OPENED))
-		put_filp(od.filp);
+		put_filp(file);
 	if (res == ERR_PTR(-EOPENSTALE)) {
 		if (flags & LOOKUP_RCU)
 			res = ERR_PTR(-ECHILD);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b56f4b36ed4..dafc86c1c35 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static int nfs_atomic_open(struct inode *, struct dentry *,
-			   struct opendata *, unsigned, umode_t,
+			   struct file *, unsigned, umode_t,
 			   int *);
 const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_create,
@@ -1389,10 +1389,9 @@ static int do_open(struct inode *inode, struct file *filp)
 
 static int nfs_finish_open(struct nfs_open_context *ctx,
 			   struct dentry *dentry,
-			   struct opendata *od, unsigned open_flags,
+			   struct file *file, unsigned open_flags,
 			   int *opened)
 {
-	struct file *filp;
 	int err;
 
 	if (ctx->dentry != dentry) {
@@ -1407,13 +1406,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 			goto out;
 	}
 
-	filp = finish_open(od, dentry, do_open, opened);
-	if (IS_ERR(filp)) {
-		err = PTR_ERR(filp);
+	err = finish_open(file, dentry, do_open, opened);
+	if (err)
 		goto out;
-	}
-	nfs_file_set_open_context(filp, ctx);
-	err = 0;
+	nfs_file_set_open_context(file, ctx);
 
 out:
 	put_nfs_open_context(ctx);
@@ -1421,7 +1417,7 @@ out:
 }
 
 static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
-			    struct opendata *od, unsigned open_flags,
+			    struct file *file, unsigned open_flags,
 			    umode_t mode, int *opened)
 {
 	struct nfs_open_context *ctx;
@@ -1497,7 +1493,7 @@ static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	nfs_unblock_sillyrename(dentry->d_parent);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 
-	err = nfs_finish_open(ctx, dentry, od, open_flags, opened);
+	err = nfs_finish_open(ctx, dentry, file, open_flags, opened);
 
 	dput(res);
 out:
@@ -1509,7 +1505,7 @@ no_open:
 	if (IS_ERR(res))
 		goto out;
 
-	finish_no_open(od, res);
+	finish_no_open(file, res);
 	return 1;
 }
 
diff --git a/fs/open.c b/fs/open.c
index 2b1654d8bfb..fc829d6c3a4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -781,21 +781,23 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
  * If the open callback is set to NULL, then the standard f_op->open()
  * filesystem callback is substituted.
  */
-struct file *finish_open(struct opendata *od, struct dentry *dentry,
-			 int (*open)(struct inode *, struct file *),
-			 int *opened)
+int finish_open(struct file *file, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *),
+		int *opened)
 {
 	struct file *res;
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
-	mntget(od->filp->f_path.mnt);
+	mntget(file->f_path.mnt);
 	dget(dentry);
 
-	res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred());
-	if (!IS_ERR(res))
+	res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred());
+	if (!IS_ERR(res)) {
 		*opened |= FILE_OPENED;
+		return 0;
+	}
 
-	return res;
+	return PTR_ERR(res);
 }
 EXPORT_SYMBOL(finish_open);
 
@@ -808,9 +810,9 @@ EXPORT_SYMBOL(finish_open);
  * This can be used to set the result of a successful lookup in ->atomic_open().
  * The filesystem's atomic_open() method shall return NULL after calling this.
  */
-void finish_no_open(struct opendata *od, struct dentry *dentry)
+void finish_no_open(struct file *file, struct dentry *dentry)
 {
-	od->filp->f_path.dentry = dentry;
+	file->f_path.dentry = dentry;
 }
 EXPORT_SYMBOL(finish_no_open);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33bda922988..1dcc75c9576 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,7 +427,6 @@ struct kstatfs;
 struct vm_area_struct;
 struct vfsmount;
 struct cred;
-struct opendata;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1695,7 +1694,7 @@ struct inode_operations {
 		      u64 len);
 	int (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
-			   struct opendata *, unsigned open_flag,
+			   struct file *, unsigned open_flag,
 			   umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
@@ -2069,10 +2068,10 @@ enum {
 	FILE_CREATED = 1,
 	FILE_OPENED = 2
 };
-extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
-				int (*open)(struct inode *, struct file *),
-				int *opened);
-extern void finish_no_open(struct opendata *od, struct dentry *dentry);
+extern int finish_open(struct file *file, struct dentry *dentry,
+			int (*open)(struct inode *, struct file *),
+			int *opened);
+extern void finish_no_open(struct file *file, struct dentry *dentry);
 
 /* fs/ioctl.c */
 
-- 
cgit v1.2.3-70-g09d2


From 2675a4eb6a9f1240098721c8a84ede28abd9d7b3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 22 Jun 2012 12:41:10 +0400
Subject: fs/namei.c: get do_last() and friends return int

Same conventions as for ->atomic_open().  Trimmed the
forest of labels a bit, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 150 +++++++++++++++++++++++++++++--------------------------------
 1 file changed, 70 insertions(+), 80 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index aaff8a86215..16256d915cb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2193,18 +2193,17 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 	return security_inode_create(dir->dentry->d_inode, dentry, mode);
 }
 
-static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
-				struct path *path, struct file *file,
-				const struct open_flags *op,
-				bool *want_write, bool need_lookup,
-				int *opened)
+static int atomic_open(struct nameidata *nd, struct dentry *dentry,
+			struct path *path, struct file *file,
+			const struct open_flags *op,
+			bool *want_write, bool need_lookup,
+			int *opened)
 {
 	struct inode *dir =  nd->path.dentry->d_inode;
 	unsigned open_flag = open_to_namei_flags(op->open_flag);
 	umode_t mode;
 	int error;
 	int acc_mode;
-	struct file *filp = NULL;
 	int create_error = 0;
 	struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
 
@@ -2212,7 +2211,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 
 	/* Don't create child dentry for a dead directory. */
 	if (unlikely(IS_DEADDIR(dir))) {
-		filp = ERR_PTR(-ENOENT);
+		error = -ENOENT;
 		goto out;
 	}
 
@@ -2276,7 +2275,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	if (error < 0) {
 		if (create_error && error == -ENOENT)
 			error = create_error;
-		filp = ERR_PTR(error);
 		goto out;
 	}
 
@@ -2288,7 +2286,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 
 	if (error) {	/* returned 1, that is */
 		if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
-			filp = ERR_PTR(-EIO);
+			error = -EIO;
 			goto out;
 		}
 		if (file->f_path.dentry) {
@@ -2302,27 +2300,24 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * We didn't have the inode before the open, so check open permission
 	 * here.
 	 */
-	filp = file;
-	error = may_open(&filp->f_path, acc_mode, open_flag);
-	if (error) {
-		fput(filp);
-		filp = ERR_PTR(error);
-	}
+	error = may_open(&file->f_path, acc_mode, open_flag);
+	if (error)
+		fput(file);
 
 out:
 	dput(dentry);
-	return filp;
+	return error;
 
 no_open:
 	if (need_lookup) {
 		dentry = lookup_real(dir, dentry, nd);
 		if (IS_ERR(dentry))
-			return ERR_CAST(dentry);
+			return PTR_ERR(dentry);
 
 		if (create_error) {
 			int open_flag = op->open_flag;
 
-			filp = ERR_PTR(create_error);
+			error = create_error;
 			if ((open_flag & O_EXCL)) {
 				if (!dentry->d_inode)
 					goto out;
@@ -2338,7 +2333,7 @@ no_open:
 looked_up:
 	path->dentry = dentry;
 	path->mnt = nd->path.mnt;
-	return NULL;
+	return 1;
 }
 
 /*
@@ -2349,10 +2344,10 @@ looked_up:
  * Returns open file or NULL on success, error otherwise.  NULL means no open
  * was performed, only lookup.
  */
-static struct file *lookup_open(struct nameidata *nd, struct path *path,
-				struct file *file,
-				const struct open_flags *op,
-				bool *want_write, int *opened)
+static int lookup_open(struct nameidata *nd, struct path *path,
+			struct file *file,
+			const struct open_flags *op,
+			bool *want_write, int *opened)
 {
 	struct dentry *dir = nd->path.dentry;
 	struct inode *dir_inode = dir->d_inode;
@@ -2363,7 +2358,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 	*opened &= ~FILE_CREATED;
 	dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup);
 	if (IS_ERR(dentry))
-		return ERR_CAST(dentry);
+		return PTR_ERR(dentry);
 
 	/* Cached positive dentry: will open in f_op->open */
 	if (!need_lookup && dentry->d_inode)
@@ -2379,7 +2374,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 
 		dentry = lookup_real(dir_inode, dentry, nd);
 		if (IS_ERR(dentry))
-			return ERR_CAST(dentry);
+			return PTR_ERR(dentry);
 	}
 
 	/* Negative dentry, just create the file */
@@ -2409,26 +2404,25 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
 out_no_open:
 	path->dentry = dentry;
 	path->mnt = nd->path.mnt;
-	return NULL;
+	return 1;
 
 out_dput:
 	dput(dentry);
-	return ERR_PTR(error);
+	return error;
 }
 
 /*
  * Handle the last step of open()
  */
-static struct file *do_last(struct nameidata *nd, struct path *path,
-			    struct file *file, const struct open_flags *op,
-			    int *opened, const char *pathname)
+static int do_last(struct nameidata *nd, struct path *path,
+		   struct file *file, const struct open_flags *op,
+		   int *opened, const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
 	int open_flag = op->open_flag;
 	bool will_truncate = (open_flag & O_TRUNC) != 0;
 	bool want_write = false;
 	int acc_mode = op->acc_mode;
-	struct file *filp;
 	struct inode *inode;
 	bool symlink_ok = false;
 	struct path save_parent = { .dentry = NULL, .mnt = NULL };
@@ -2443,22 +2437,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	case LAST_DOT:
 		error = handle_dots(nd, nd->last_type);
 		if (error)
-			return ERR_PTR(error);
+			return error;
 		/* fallthrough */
 	case LAST_ROOT:
 		error = complete_walk(nd);
 		if (error)
-			return ERR_PTR(error);
+			return error;
 		audit_inode(pathname, nd->path.dentry);
 		if (open_flag & O_CREAT) {
 			error = -EISDIR;
-			goto exit;
+			goto out;
 		}
 		goto finish_open;
 	case LAST_BIND:
 		error = complete_walk(nd);
 		if (error)
-			return ERR_PTR(error);
+			return error;
 		audit_inode(pathname, dir);
 		goto finish_open;
 	}
@@ -2474,7 +2468,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			goto finish_lookup;
 
 		if (error < 0)
-			goto exit;
+			goto out;
 
 		BUG_ON(nd->inode != dir->d_inode);
 	} else {
@@ -2486,29 +2480,29 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		 */
 		error = complete_walk(nd);
 		if (error)
-			return ERR_PTR(error);
+			return error;
 
 		audit_inode(pathname, dir);
 		error = -EISDIR;
 		/* trailing slashes? */
 		if (nd->last.name[nd->last.len])
-			goto exit;
+			goto out;
 	}
 
 retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
-	filp = lookup_open(nd, path, file, op, &want_write, opened);
+	error = lookup_open(nd, path, file, op, &want_write, opened);
 	mutex_unlock(&dir->d_inode->i_mutex);
 
-	if (filp) {
-		if (IS_ERR(filp))
+	if (error <= 0) {
+		if (error)
 			goto out;
 
 		if ((*opened & FILE_CREATED) ||
-		    !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+		    !S_ISREG(file->f_path.dentry->d_inode->i_mode))
 			will_truncate = false;
 
-		audit_inode(pathname, filp->f_path.dentry);
+		audit_inode(pathname, file->f_path.dentry);
 		goto opened;
 	}
 
@@ -2554,18 +2548,18 @@ finish_lookup:
 	error = -ENOENT;
 	if (!inode) {
 		path_to_nameidata(path, nd);
-		goto exit;
+		goto out;
 	}
 
 	if (should_follow_link(inode, !symlink_ok)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				error = -ECHILD;
-				goto exit;
+				goto out;
 			}
 		}
 		BUG_ON(inode != path->dentry->d_inode);
-		return NULL;
+		return 1;
 	}
 
 	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
@@ -2581,14 +2575,14 @@ finish_lookup:
 	error = complete_walk(nd);
 	if (error) {
 		path_put(&save_parent);
-		return ERR_PTR(error);
+		return error;
 	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
-		goto exit;
+		goto out;
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
-		goto exit;
+		goto out;
 	audit_inode(pathname, nd->path.dentry);
 finish_open:
 	if (!S_ISREG(nd->inode->i_mode))
@@ -2597,32 +2591,30 @@ finish_open:
 	if (will_truncate) {
 		error = mnt_want_write(nd->path.mnt);
 		if (error)
-			goto exit;
+			goto out;
 		want_write = true;
 	}
 finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
-		goto exit;
+		goto out;
 	file->f_path.mnt = nd->path.mnt;
 	error = finish_open(file, nd->path.dentry, NULL, opened);
 	if (error) {
-		filp = ERR_PTR(error);
 		if (error == -EOPENSTALE)
 			goto stale_open;
 		goto out;
 	}
-	filp = file;
 opened:
-	error = open_check_o_direct(filp);
+	error = open_check_o_direct(file);
 	if (error)
 		goto exit_fput;
-	error = ima_file_check(filp, op->acc_mode);
+	error = ima_file_check(file, op->acc_mode);
 	if (error)
 		goto exit_fput;
 
 	if (will_truncate) {
-		error = handle_truncate(filp);
+		error = handle_truncate(file);
 		if (error)
 			goto exit_fput;
 	}
@@ -2631,16 +2623,14 @@ out:
 		mnt_drop_write(nd->path.mnt);
 	path_put(&save_parent);
 	terminate_walk(nd);
-	return filp;
+	return error;
 
 exit_dput:
 	path_put_conditional(path, nd);
-exit:
-	filp = ERR_PTR(error);
 	goto out;
 exit_fput:
-	fput(filp);
-	goto exit;
+	fput(file);
+	goto out;
 
 stale_open:
 	/* If no saved parent or already retried then can't retry */
@@ -2666,7 +2656,6 @@ static struct file *path_openat(int dfd, const char *pathname,
 {
 	struct file *base = NULL;
 	struct file *file;
-	struct file *res;
 	struct path path;
 	int opened = 0;
 	int error;
@@ -2679,29 +2668,29 @@ static struct file *path_openat(int dfd, const char *pathname,
 
 	error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
 	if (unlikely(error))
-		goto out_filp;
+		goto out;
 
 	current->total_link_count = 0;
 	error = link_path_walk(pathname, nd);
 	if (unlikely(error))
-		goto out_filp;
+		goto out;
 
-	res = do_last(nd, &path, file, op, &opened, pathname);
-	while (unlikely(!res)) { /* trailing symlink */
+	error = do_last(nd, &path, file, op, &opened, pathname);
+	while (unlikely(error > 0)) { /* trailing symlink */
 		struct path link = path;
 		void *cookie;
 		if (!(nd->flags & LOOKUP_FOLLOW)) {
 			path_put_conditional(&path, nd);
 			path_put(&nd->path);
-			res = ERR_PTR(-ELOOP);
+			error = -ELOOP;
 			break;
 		}
 		nd->flags |= LOOKUP_PARENT;
 		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
 		error = follow_link(&link, nd, &cookie);
 		if (unlikely(error))
-			goto out_filp;
-		res = do_last(nd, &path, file, op, &opened, pathname);
+			break;
+		error = do_last(nd, &path, file, op, &opened, pathname);
 		put_link(nd, &link, cookie);
 	}
 out:
@@ -2709,19 +2698,20 @@ out:
 		path_put(&nd->root);
 	if (base)
 		fput(base);
-	if (!(opened & FILE_OPENED))
+	if (!(opened & FILE_OPENED)) {
+		BUG_ON(!error);
 		put_filp(file);
-	if (res == ERR_PTR(-EOPENSTALE)) {
-		if (flags & LOOKUP_RCU)
-			res = ERR_PTR(-ECHILD);
-		else
-			res = ERR_PTR(-ESTALE);
 	}
-	return res;
-
-out_filp:
-	res = ERR_PTR(error);
-	goto out;
+	if (unlikely(error)) {
+		if (error == -EOPENSTALE) {
+			if (flags & LOOKUP_RCU)
+				error = -ECHILD;
+			else
+				error = -ESTALE;
+		}
+		file = ERR_PTR(error);
+	}
+	return file;
 }
 
 struct file *do_filp_open(int dfd, const char *pathname,
-- 
cgit v1.2.3-70-g09d2


From e45198a6ac24bd2c4ad4a43b670c2f1a23dd2df3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 06:48:09 -0400
Subject: make finish_no_open() return int

namely, 1 ;-)  That's what we want to return from ->atomic_open()
instances after finish_no_open().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 6 ++----
 fs/9p/vfs_inode_dotl.c | 6 ++----
 fs/ceph/dir.c          | 6 ++----
 fs/cifs/dir.c          | 3 +--
 fs/fuse/dir.c          | 3 +--
 fs/nfs/dir.c           | 3 +--
 fs/open.c              | 3 ++-
 include/linux/fs.h     | 2 +-
 8 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2b05651e0c3..eae476fb401 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -878,10 +878,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 
 	/* Only creates */
-	if (!(flags & O_CREAT) || dentry->d_inode) {
-		finish_no_open(file, res);
-		return 1;
-	}
+	if (!(flags & O_CREAT) || dentry->d_inode)
+		return finish_no_open(file, res);
 
 	err = 0;
 	fid = NULL;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cfaebdef974..1ee10c89df9 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -268,10 +268,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 
 	/* Only creates */
-	if (!(flags & O_CREAT) || dentry->d_inode) {
-		finish_no_open(file, res);
-		return 1;
-	}
+	if (!(flags & O_CREAT) || dentry->d_inode)
+		return finish_no_open(file, res);
 
 	v9ses = v9fs_inode2v9ses(dir);
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 80c848e0539..d42eee1c5de 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -662,10 +662,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 
 	/* We don't deal with positive dentries here */
-	if (dentry->d_inode) {
-		finish_no_open(file, res);
-		return 1;
-	}
+	if (dentry->d_inode)
+		return finish_no_open(file, res);
 
 	*opened |= FILE_CREATED;
 	err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c00c192f17e..e8c53c80dbd 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -405,8 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
-		finish_no_open(file, res);
-		return 1;
+		return finish_no_open(file, res);
 	}
 
 	rc = check_name(direntry);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 110db5425dc..ccdab3ac422 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -509,8 +509,7 @@ mknod:
 	if (err)
 		goto out_dput;
 no_open:
-	finish_no_open(file, res);
-	return 1;
+	return finish_no_open(file, res);
 }
 
 /*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dafc86c1c35..f167c7a1d67 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1505,8 +1505,7 @@ no_open:
 	if (IS_ERR(res))
 		goto out;
 
-	finish_no_open(file, res);
-	return 1;
+	return finish_no_open(file, res);
 }
 
 static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/open.c b/fs/open.c
index fc829d6c3a4..d51c1b71b06 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -810,9 +810,10 @@ EXPORT_SYMBOL(finish_open);
  * This can be used to set the result of a successful lookup in ->atomic_open().
  * The filesystem's atomic_open() method shall return NULL after calling this.
  */
-void finish_no_open(struct file *file, struct dentry *dentry)
+int finish_no_open(struct file *file, struct dentry *dentry)
 {
 	file->f_path.dentry = dentry;
+	return 1;
 }
 EXPORT_SYMBOL(finish_no_open);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1dcc75c9576..17ee20dba86 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,7 +2071,7 @@ enum {
 extern int finish_open(struct file *file, struct dentry *dentry,
 			int (*open)(struct inode *, struct file *),
 			int *opened);
-extern void finish_no_open(struct file *file, struct dentry *dentry);
+extern int finish_no_open(struct file *file, struct dentry *dentry);
 
 /* fs/ioctl.c */
 
-- 
cgit v1.2.3-70-g09d2


From 96b7e579addd3cdc806c1667bf5b6b126070827c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 14:22:04 -0400
Subject: switch do_dentry_open() to returning int

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index d51c1b71b06..1241c597d31 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -667,10 +667,10 @@ int open_check_o_direct(struct file *f)
 	return 0;
 }
 
-static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-				   struct file *f,
-				   int (*open)(struct inode *, struct file *),
-				   const struct cred *cred)
+static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+			  struct file *f,
+			  int (*open)(struct inode *, struct file *),
+			  const struct cred *cred)
 {
 	static const struct file_operations empty_fops = {};
 	struct inode *inode;
@@ -699,7 +699,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 
 	if (unlikely(f->f_mode & FMODE_PATH)) {
 		f->f_op = &empty_fops;
-		return f;
+		return 0;
 	}
 
 	f->f_op = fops_get(inode->i_fop);
@@ -726,7 +726,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 
-	return f;
+	return 0;
 
 cleanup_all:
 	fops_put(f->f_op);
@@ -749,7 +749,7 @@ cleanup_all:
 cleanup_file:
 	dput(dentry);
 	mntput(mnt);
-	return ERR_PTR(error);
+	return error;
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
@@ -757,17 +757,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 				int (*open)(struct inode *, struct file *),
 				const struct cred *cred)
 {
-	struct file *res = do_dentry_open(dentry, mnt, f, open, cred);
-	if (!IS_ERR(res)) {
-		int error = open_check_o_direct(f);
+	int error;
+	error = do_dentry_open(dentry, mnt, f, open, cred);
+	if (!error) {
+		error = open_check_o_direct(f);
 		if (error) {
-			fput(res);
-			res = ERR_PTR(error);
+			fput(f);
+			f = ERR_PTR(error);
 		}
-	} else {
+	} else { 
 		put_filp(f);
+		f = ERR_PTR(error);
 	}
-	return res;
+	return f;
 }
 
 /**
@@ -785,19 +787,17 @@ int finish_open(struct file *file, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *),
 		int *opened)
 {
-	struct file *res;
+	int error;
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
 	mntget(file->f_path.mnt);
 	dget(dentry);
 
-	res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred());
-	if (!IS_ERR(res)) {
+	error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred());
+	if (!error)
 		*opened |= FILE_OPENED;
-		return 0;
-	}
 
-	return PTR_ERR(res);
+	return error;
 }
 EXPORT_SYMBOL(finish_open);
 
-- 
cgit v1.2.3-70-g09d2


From 2a027e7a1873812240cbdac0f55c4734ff0042a5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 14:24:38 -0400
Subject: fold __dentry_open() into its sole caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 1241c597d31..28fbacbd5e3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -752,26 +752,6 @@ cleanup_file:
 	return error;
 }
 
-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-				struct file *f,
-				int (*open)(struct inode *, struct file *),
-				const struct cred *cred)
-{
-	int error;
-	error = do_dentry_open(dentry, mnt, f, open, cred);
-	if (!error) {
-		error = open_check_o_direct(f);
-		if (error) {
-			fput(f);
-			f = ERR_PTR(error);
-		}
-	} else { 
-		put_filp(f);
-		f = ERR_PTR(error);
-	}
-	return f;
-}
-
 /**
  * finish_open - finish opening a file
  * @od: opaque open data
@@ -841,7 +821,18 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 	}
 
 	f->f_flags = flags;
-	return __dentry_open(dentry, mnt, f, NULL, cred);
+	error = do_dentry_open(dentry, mnt, f, NULL, cred);
+	if (!error) {
+		error = open_check_o_direct(f);
+		if (error) {
+			fput(f);
+			f = ERR_PTR(error);
+		}
+	} else { 
+		put_filp(f);
+		f = ERR_PTR(error);
+	}
+	return f;
 }
 EXPORT_SYMBOL(dentry_open);
 
-- 
cgit v1.2.3-70-g09d2


From 02e5180d991f203441687cecd0b7e6a2ba0a34d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 14:32:45 -0400
Subject: do_dentry_open(): take initialization of file->f_path to caller

... and get rid of a couple of arguments and a pointless reassignment
in finish_open() case.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 28fbacbd5e3..124ccb1d38a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -667,8 +667,7 @@ int open_check_o_direct(struct file *f)
 	return 0;
 }
 
-static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-			  struct file *f,
+static int do_dentry_open(struct file *f,
 			  int (*open)(struct inode *, struct file *),
 			  const struct cred *cred)
 {
@@ -682,9 +681,9 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	if (unlikely(f->f_flags & O_PATH))
 		f->f_mode = FMODE_PATH;
 
-	inode = dentry->d_inode;
+	inode = f->f_path.dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
-		error = __get_file_write_access(inode, mnt);
+		error = __get_file_write_access(inode, f->f_path.mnt);
 		if (error)
 			goto cleanup_file;
 		if (!special_file(inode->i_mode))
@@ -692,8 +691,6 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	}
 
 	f->f_mapping = inode->i_mapping;
-	f->f_path.dentry = dentry;
-	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	file_sb_list_add(f, inode->i_sb);
 
@@ -740,15 +737,14 @@ cleanup_all:
 			 * here, so just reset the state.
 			 */
 			file_reset_write(f);
-			mnt_drop_write(mnt);
+			mnt_drop_write(f->f_path.mnt);
 		}
 	}
 	file_sb_list_del(f);
-	f->f_path.dentry = NULL;
-	f->f_path.mnt = NULL;
 cleanup_file:
-	dput(dentry);
-	mntput(mnt);
+	path_put(&f->f_path);
+	f->f_path.mnt = NULL;
+	f->f_path.dentry = NULL;
 	return error;
 }
 
@@ -771,9 +767,9 @@ int finish_open(struct file *file, struct dentry *dentry,
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
 	mntget(file->f_path.mnt);
-	dget(dentry);
+	file->f_path.dentry = dget(dentry);
 
-	error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred());
+	error = do_dentry_open(file, open, current_cred());
 	if (!error)
 		*opened |= FILE_OPENED;
 
@@ -821,7 +817,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 	}
 
 	f->f_flags = flags;
-	error = do_dentry_open(dentry, mnt, f, NULL, cred);
+	f->f_path.mnt = mnt;
+	f->f_path.dentry = dentry;
+	error = do_dentry_open(f, NULL, cred);
 	if (!error) {
 		error = open_check_o_direct(f);
 		if (error) {
-- 
cgit v1.2.3-70-g09d2


From 93420b40bb19433c3bc01c37c6c908ae7ce13228 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 15:18:15 -0400
Subject: switch nfs_lookup_check_intent() away from nameidata

just pass the flags

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f167c7a1d67..48485f1f0bd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1037,10 +1037,10 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
  * component of the path and none of them is set before that last
  * component.
  */
-static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
+static inline unsigned int nfs_lookup_check_intent(unsigned int flags,
 						unsigned int mask)
 {
-	return nd->flags & mask;
+	return flags & mask;
 }
 
 /*
@@ -1051,7 +1051,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 {
 	if (NFS_PROTO(dir)->version == 2)
 		return 0;
-	return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL);
+	return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL);
 }
 
 /*
@@ -1074,7 +1074,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
 		if (nd->flags & LOOKUP_REVAL)
 			goto out_force;
 		/* This is an open(2) */
-		if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
+		if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 &&
 				!(server->flags & NFS_MOUNT_NOCTO) &&
 				(S_ISREG(inode->i_mode) ||
 				 S_ISDIR(inode->i_mode)))
@@ -1098,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 		       struct nameidata *nd)
 {
 	/* Don't revalidate a negative dentry if we're creating a new file */
-	if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
+	if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0)
 		return 0;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
 		return 1;
-- 
cgit v1.2.3-70-g09d2


From facc3530fb5c89a40bc83045422add392b8db4a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 15:33:51 -0400
Subject: nfs_lookup_verify_inode() - nd is *always* non-NULL here

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 48485f1f0bd..ad5aef4995a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1069,19 +1069,16 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
 
 	if (IS_AUTOMOUNT(inode))
 		return 0;
-	if (nd != NULL) {
-		/* VFS wants an on-the-wire revalidation */
-		if (nd->flags & LOOKUP_REVAL)
-			goto out_force;
-		/* This is an open(2) */
-		if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 &&
-				!(server->flags & NFS_MOUNT_NOCTO) &&
-				(S_ISREG(inode->i_mode) ||
-				 S_ISDIR(inode->i_mode)))
-			goto out_force;
-		return 0;
-	}
-	return nfs_revalidate_inode(server, inode);
+	/* VFS wants an on-the-wire revalidation */
+	if (nd->flags & LOOKUP_REVAL)
+		goto out_force;
+	/* This is an open(2) */
+	if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 &&
+			!(server->flags & NFS_MOUNT_NOCTO) &&
+			(S_ISREG(inode->i_mode) ||
+			 S_ISDIR(inode->i_mode)))
+		goto out_force;
+	return 0;
 out_force:
 	return __nfs_revalidate_inode(server, inode);
 }
-- 
cgit v1.2.3-70-g09d2


From fa3c56bbda6c2ac2a57d96ba501dbe85cccd312b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 15:36:40 -0400
Subject: fs/nfs/dir.c: switch to passing nd->flags instead of nd wherever
 possible

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 51 +++++++++++++++++++--------------------------------
 1 file changed, 19 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ad5aef4995a..71a199435ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1030,28 +1030,15 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 	return 1;
 }
 
-/*
- * Return the intent data that applies to this particular path component
- *
- * Note that the current set of intents only apply to the very last
- * component of the path and none of them is set before that last
- * component.
- */
-static inline unsigned int nfs_lookup_check_intent(unsigned int flags,
-						unsigned int mask)
-{
-	return flags & mask;
-}
-
 /*
  * Use intent information to check whether or not we're going to do
  * an O_EXCL create using this path component.
  */
-static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
 {
 	if (NFS_PROTO(dir)->version == 2)
 		return 0;
-	return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL);
+	return flags & LOOKUP_EXCL;
 }
 
 /*
@@ -1063,20 +1050,18 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
  *
  */
 static inline
-int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
+int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 
 	if (IS_AUTOMOUNT(inode))
 		return 0;
 	/* VFS wants an on-the-wire revalidation */
-	if (nd->flags & LOOKUP_REVAL)
+	if (flags & LOOKUP_REVAL)
 		goto out_force;
 	/* This is an open(2) */
-	if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 &&
-			!(server->flags & NFS_MOUNT_NOCTO) &&
-			(S_ISREG(inode->i_mode) ||
-			 S_ISDIR(inode->i_mode)))
+	if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) &&
+	    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
 		goto out_force;
 	return 0;
 out_force:
@@ -1092,10 +1077,10 @@ out_force:
  */
 static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
-		       struct nameidata *nd)
+		       unsigned int flags)
 {
 	/* Don't revalidate a negative dentry if we're creating a new file */
-	if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0)
+	if (flags & LOOKUP_CREATE)
 		return 0;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
 		return 1;
@@ -1115,6 +1100,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
  */
 static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	unsigned int flags = nd->flags;
 	struct inode *dir;
 	struct inode *inode;
 	struct dentry *parent;
@@ -1122,7 +1108,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct nfs_fattr *fattr = NULL;
 	int error;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	parent = dget_parent(dentry);
@@ -1131,7 +1117,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	inode = dentry->d_inode;
 
 	if (!inode) {
-		if (nfs_neg_need_reval(dir, dentry, nd))
+		if (nfs_neg_need_reval(dir, dentry, flags))
 			goto out_bad;
 		goto out_valid_noent;
 	}
@@ -1147,8 +1133,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 		goto out_set_verifier;
 
 	/* Force a full look up iff the parent directory has changed */
-	if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
-		if (nfs_lookup_verify_inode(inode, nd))
+	if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
+		if (nfs_lookup_verify_inode(inode, flags))
 			goto out_zap_parent;
 		goto out_valid;
 	}
@@ -1306,7 +1292,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	 * If we're doing an exclusive create, optimize away the lookup
 	 * but don't hash the dentry.
 	 */
-	if (nfs_is_exclusive_create(dir, nd)) {
+	if (nd && nfs_is_exclusive_create(dir, nd->flags)) {
 		d_instantiate(dentry, NULL);
 		res = NULL;
 		goto out;
@@ -1507,15 +1493,16 @@ no_open:
 
 static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	unsigned int flags = nd->flags;
 	struct dentry *parent = NULL;
 	struct inode *inode;
 	struct inode *dir;
 	int ret = 0;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY))
+	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
 		goto no_open;
 	if (d_mountpoint(dentry))
 		goto no_open;
@@ -1528,7 +1515,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 * optimize away revalidation of negative dentries.
 	 */
 	if (inode == NULL) {
-		if (!nfs_neg_need_reval(dir, dentry, nd))
+		if (!nfs_neg_need_reval(dir, dentry, flags))
 			ret = 1;
 		goto out;
 	}
@@ -1537,7 +1524,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (!S_ISREG(inode->i_mode))
 		goto no_open_dput;
 	/* We cannot do exclusive creation on a positive dentry */
-	if (nd && nd->flags & LOOKUP_EXCL)
+	if (flags & LOOKUP_EXCL)
 		goto no_open_dput;
 
 	/* Let f_op->open() actually open (and revalidate) the file */
-- 
cgit v1.2.3-70-g09d2


From 0b728e1911cbe6e24020727c3870628b9653f32a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 16:03:43 -0400
Subject: stop passing nameidata * to ->d_revalidate()

Just the lookup flags.  Die, bastard, die...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/porting |  5 +++++
 Documentation/filesystems/vfs.txt |  8 ++++----
 fs/9p/vfs_dentry.c                |  4 ++--
 fs/afs/dir.c                      |  6 +++---
 fs/ceph/dir.c                     |  6 +++---
 fs/cifs/dir.c                     |  8 ++++----
 fs/coda/dir.c                     |  6 +++---
 fs/ecryptfs/dentry.c              | 20 ++++----------------
 fs/fat/namei_vfat.c               | 12 ++++++------
 fs/fuse/dir.c                     |  4 ++--
 fs/gfs2/dentry.c                  |  6 +++---
 fs/hfs/sysdep.c                   |  4 ++--
 fs/jfs/namei.c                    |  6 +++---
 fs/namei.c                        |  2 +-
 fs/ncpfs/dir.c                    |  6 +++---
 fs/nfs/dir.c                      | 10 ++++------
 fs/ocfs2/dcache.c                 |  5 ++---
 fs/proc/base.c                    | 22 +++++++++++-----------
 fs/proc/internal.h                |  2 +-
 fs/proc/namespaces.c              |  2 +-
 fs/proc/proc_sysctl.c             |  4 ++--
 fs/reiserfs/xattr.c               |  2 +-
 fs/sysfs/dir.c                    |  4 ++--
 include/linux/dcache.h            |  2 +-
 25 files changed, 74 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 33e5243948f..52a057367f6 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -9,7 +9,7 @@ be able to use diff(1).
 
 --------------------------- dentry_operations --------------------------
 prototypes:
-	int (*d_revalidate)(struct dentry *, struct nameidata *);
+	int (*d_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index ed9fbc23ece..56750b714d1 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -431,3 +431,8 @@ release it yourself.
 	d_alloc_root() is gone, along with a lot of bugs caused by code
 misusing it.  Replacement: d_make_root(inode).  The difference is,
 d_make_root() drops the reference to inode if dentry allocation fails.  
+
+--
+[mandatory]
+	The witch is dead!  Well, 1/3 of it, anyway.  ->d_revalidate() does *not*
+take struct nameidata anymore; just the flags.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 279de219036..b9a406b2ed0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -902,7 +902,7 @@ the VFS uses a default. As of kernel 2.6.22, the following members are
 defined:
 
 struct dentry_operations {
-	int (*d_revalidate)(struct dentry *, struct nameidata *);
+	int (*d_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
@@ -921,11 +921,11 @@ struct dentry_operations {
 	dcache. Most filesystems leave this as NULL, because all their
 	dentries in the dcache are valid
 
-	d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU).
+	d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
 	If in rcu-walk mode, the filesystem must revalidate the dentry without
 	blocking or storing to the dentry, d_parent and d_inode should not be
-	used without care (because they can go NULL), instead nd->inode should
-	be used.
+	used without care (because they can change and, in d_inode case, even
+	become NULL under us).
 
 	If a situation is encountered that rcu-walk cannot handle, return
 	-ECHILD and it will be called again in ref-walk mode.
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d529437ff44..64600b5d052 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -100,13 +100,13 @@ static void v9fs_dentry_release(struct dentry *dentry)
 	}
 }
 
-static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct p9_fid *fid;
 	struct inode *inode;
 	struct v9fs_inode *v9inode;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = dentry->d_inode;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e22dc4b4a50..65c54ab0473 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 struct nameidata *nd);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
-static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
@@ -598,7 +598,7 @@ success:
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
  */
-static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct afs_vnode *vnode, *dir;
 	struct afs_fid uninitialized_var(fid);
@@ -607,7 +607,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	void *dir_version;
 	int ret;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	vnode = AFS_FS_I(dentry->d_inode);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d42eee1c5de..8898eef8bca 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1042,12 +1042,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 /*
  * Check if cached dentry can be trusted.
  */
-static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	int valid = 0;
 	struct inode *dir;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
@@ -1094,7 +1094,7 @@ static void ceph_d_release(struct dentry *dentry)
 }
 
 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
-					  struct nameidata *nd)
+					  unsigned int flags)
 {
 	/*
 	 * Eventually, we'll want to revalidate snapped metadata
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e8c53c80dbd..b97ff48b7df 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -700,9 +700,9 @@ lookup_out:
 }
 
 static int
-cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
+cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
 {
-	if (nd && (nd->flags & LOOKUP_RCU))
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	if (direntry->d_inode) {
@@ -731,7 +731,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 	 * This may be nfsd (or something), anyway, we can't see the
 	 * intent of this. So, since this can be for creation, drop it.
 	 */
-	if (!nd)
+	if (!flags)
 		return 0;
 
 	/*
@@ -739,7 +739,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 	 * case sensitive name which is specified by user if this is
 	 * for creation.
 	 */
-	if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 		return 0;
 
 	if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 17751582906..7f8f1a7c6d8 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -46,7 +46,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
 static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
 
 /* dentry ops */
-static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
+static int coda_dentry_revalidate(struct dentry *de, unsigned int flags);
 static int coda_dentry_delete(const struct dentry *);
 
 /* support routines */
@@ -536,12 +536,12 @@ out:
 }
 
 /* called when a cache lookup succeeds */
-static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
+static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
 {
 	struct inode *inode;
 	struct coda_inode_info *cii;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = de->d_inode;
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 534c1d46e69..1b5d9af937d 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -32,7 +32,7 @@
 /**
  * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
  * @dentry: The ecryptfs dentry
- * @nd: The associated nameidata
+ * @flags: lookup flags
  *
  * Called when the VFS needs to revalidate a dentry. This
  * is called whenever a name lookup finds a dentry in the
@@ -42,32 +42,20 @@
  * Returns 1 if valid, 0 otherwise.
  *
  */
-static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_mnt;
-	struct dentry *dentry_save = NULL;
-	struct vfsmount *vfsmount_save = NULL;
 	int rc = 1;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
 	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
 		goto out;
-	if (nd) {
-		dentry_save = nd->path.dentry;
-		vfsmount_save = nd->path.mnt;
-		nd->path.dentry = lower_dentry;
-		nd->path.mnt = lower_mnt;
-	}
-	rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
-	if (nd) {
-		nd->path.dentry = dentry_save;
-		nd->path.mnt = vfsmount_save;
-	}
+	rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
 	if (dentry->d_inode) {
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 98ae804f527..0bbdf399006 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -41,9 +41,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 	return ret;
 }
 
-static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	/* This is not negative dentry. Always valid. */
@@ -52,9 +52,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return vfat_revalidate_shortname(dentry);
 }
 
-static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
 {
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	/*
@@ -74,7 +74,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 	 * This may be nfsd (or something), anyway, we can't see the
 	 * intent of this. So, since this can be for creation, drop it.
 	 */
-	if (!nd)
+	if (!flags)
 		return 0;
 
 	/*
@@ -82,7 +82,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 	 * case sensitive name which is specified by user if this is
 	 * for creation.
 	 */
-	if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 		return 0;
 
 	return vfat_revalidate_shortname(dentry);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccdab3ac422..eba30bd9ba2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
  * the lookup once more.  If the lookup results in the same inode,
  * then refresh the attributes, timeouts and mark the dentry valid.
  */
-static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 {
 	struct inode *inode;
 
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		if (!inode)
 			return 0;
 
-		if (nd && (nd->flags & LOOKUP_RCU))
+		if (flags & LOOKUP_RCU)
 			return -ECHILD;
 
 		fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 0da8da2c991..4fddb3c22d2 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -25,7 +25,7 @@
 /**
  * gfs2_drevalidate - Check directory lookup consistency
  * @dentry: the mapping to check
- * @nd:
+ * @flags: lookup flags
  *
  * Check to make sure the lookup necessary to arrive at this inode from its
  * parent is still good.
@@ -33,7 +33,7 @@
  * Returns: 1 if the dentry is ok, 0 if it isn't
  */
 
-static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
+static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *parent;
 	struct gfs2_sbd *sdp;
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	int error;
 	int had_lock = 0;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	parent = dget_parent(dentry);
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 19cf291eb91..91b91fd3a90 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -13,12 +13,12 @@
 
 /* dentry case-handling: just lowercase everything */
 
-static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
+static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	int diff;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = dentry->d_inode;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 07c91ca6017..f37977fb087 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1570,7 +1570,7 @@ out:
 	return result;
 }
 
-static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	/*
 	 * This is not negative dentry. Always valid.
@@ -1589,7 +1589,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 * This may be nfsd (or something), anyway, we can't see the
 	 * intent of this. So, since this can be for creation, drop it.
 	 */
-	if (!nd)
+	if (!flags)
 		return 0;
 
 	/*
@@ -1597,7 +1597,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 * case sensitive name which is specified by user if this is
 	 * for creation.
 	 */
-	if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 		return 0;
 	return 1;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 16256d915cb..1a5707aaed3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -465,7 +465,7 @@ err_root:
 
 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	return dentry->d_op->d_revalidate(dentry, nd);
+	return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0);
 }
 
 /**
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aeed93a6bde..32607f74958 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -72,7 +72,7 @@ const struct inode_operations ncp_dir_inode_operations =
 /*
  * Dentry operations routines
  */
-static int ncp_lookup_validate(struct dentry *, struct nameidata *);
+static int ncp_lookup_validate(struct dentry *, unsigned int);
 static int ncp_hash_dentry(const struct dentry *, const struct inode *,
 		struct qstr *);
 static int ncp_compare_dentry(const struct dentry *, const struct inode *,
@@ -290,7 +290,7 @@ leave_me:;
 
 
 static int
-ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
+ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
 {
 	struct ncp_server *server;
 	struct dentry *parent;
@@ -302,7 +302,7 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
 	if (dentry == dentry->d_sb->s_root)
 		return 1;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	parent = dget_parent(dentry);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71a199435ca..656f52e9aa2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1098,9 +1098,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
  * If the parent directory is seen to have changed, we throw out the
  * cached dentry and do a new lookup.
  */
-static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	unsigned int flags = nd->flags;
 	struct inode *dir;
 	struct inode *inode;
 	struct dentry *parent;
@@ -1339,7 +1338,7 @@ out:
 }
 
 #ifdef CONFIG_NFS_V4
-static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *);
+static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
 
 const struct dentry_operations nfs4_dentry_operations = {
 	.d_revalidate	= nfs4_lookup_revalidate,
@@ -1491,9 +1490,8 @@ no_open:
 	return finish_no_open(file, res);
 }
 
-static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	unsigned int flags = nd->flags;
 	struct dentry *parent = NULL;
 	struct inode *inode;
 	struct inode *dir;
@@ -1537,7 +1535,7 @@ out:
 no_open_dput:
 	dput(parent);
 no_open:
-	return nfs_lookup_revalidate(dentry, nd);
+	return nfs_lookup_revalidate(dentry, flags);
 }
 
 #endif /* CONFIG_NFSV4 */
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index af4488268e4..8db4b58b2e4 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -49,14 +49,13 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
 }
 
 
-static int ocfs2_dentry_revalidate(struct dentry *dentry,
-				   struct nameidata *nd)
+static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	int ret = 0;    /* if all else fails, just return false */
 	struct ocfs2_super *osb;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = dentry->d_inode;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 437195f204e..bf749cca4cc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1601,13 +1601,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  * made this apply to all per process world readable and executable
  * directories.
  */
-int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	struct task_struct *task;
 	const struct cred *cred;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = dentry->d_inode;
@@ -1781,7 +1781,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
 	return proc_fd_info(dentry->d_inode, path, NULL);
 }
 
-static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	struct task_struct *task;
@@ -1789,7 +1789,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct files_struct *files;
 	const struct cred *cred;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	inode = dentry->d_inode;
@@ -1868,7 +1868,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, NULL))
+	if (tid_fd_revalidate(dentry, 0))
 		error = NULL;
 
  out:
@@ -2003,7 +2003,7 @@ static int dname_to_vma_addr(struct dentry *dentry,
 	return 0;
 }
 
-static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	unsigned long vm_start, vm_end;
 	bool exact_vma_exists = false;
@@ -2013,7 +2013,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct inode *inode;
 	int status = 0;
 
-	if (nd && nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -2371,7 +2371,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, NULL))
+	if (tid_fd_revalidate(dentry, 0))
 		error = NULL;
 
  out:
@@ -2430,7 +2430,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
 	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, NULL))
+	if (pid_revalidate(dentry, 0))
 		error = NULL;
 out:
 	return error;
@@ -3237,7 +3237,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, NULL))
+	if (pid_revalidate(dentry, 0))
 		error = NULL;
 out:
 	return error;
@@ -3508,7 +3508,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, NULL))
+	if (pid_revalidate(dentry, 0))
 		error = NULL;
 out:
 	return error;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index eca4aca5b6e..e0c2a48dab7 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -142,7 +142,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	const char *name, int len,
 	instantiate_t instantiate, struct task_struct *task, const void *ptr);
-int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+int pid_revalidate(struct dentry *dentry, unsigned int flags);
 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
 extern const struct dentry_operations pid_dentry_operations;
 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 0d9e23a39e4..40ceb40f985 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -56,7 +56,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
 	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, NULL))
+	if (pid_revalidate(dentry, 0))
 		error = NULL;
 out:
 	return error;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3476bca8f7a..fda69fa3909 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -794,9 +794,9 @@ static const struct inode_operations proc_sys_dir_operations = {
 	.getattr	= proc_sys_getattr,
 };
 
-static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 46fc1c20a6b..e6ad8d7dea6 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -942,7 +942,7 @@ int reiserfs_permission(struct inode *inode, int mask)
 	return generic_permission(inode, mask);
 }
 
-static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	return -EPERM;
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e6bb9b2a4cb..038e74b3af8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -303,12 +303,12 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
 	return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
 }
 
-static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct sysfs_dirent *sd;
 	int is_dir;
 
-	if (nd->flags & LOOKUP_RCU)
+	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
 	sd = dentry->d_fsdata;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8ca25551820..caa34e50537 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -144,7 +144,7 @@ enum dentry_d_lock_class
 };
 
 struct dentry_operations {
-	int (*d_revalidate)(struct dentry *, struct nameidata *);
+	int (*d_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
-- 
cgit v1.2.3-70-g09d2


From 4ce16ef3fed92c627b4b0136c02c85c81ee105e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 16:10:59 -0400
Subject: fs/namei.c: don't pass nameidata to d_revalidate()

since the method wrapped by it doesn't need that anymore...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1a5707aaed3..91c637b6898 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -463,9 +463,9 @@ err_root:
 	return -ECHILD;
 }
 
-static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0);
+	return dentry->d_op->d_revalidate(dentry, flags);
 }
 
 /**
@@ -511,7 +511,7 @@ static int complete_walk(struct nameidata *nd)
 		return 0;
 
 	/* Note: we do not d_invalidate() */
-	status = d_revalidate(dentry, nd);
+	status = d_revalidate(dentry, nd->flags);
 	if (status > 0)
 		return 0;
 
@@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 		if (d_need_lookup(dentry)) {
 			*need_lookup = true;
 		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			error = d_revalidate(dentry, nd);
+			error = d_revalidate(dentry, nd ? nd->flags : 0);
 			if (unlikely(error <= 0)) {
 				if (error < 0) {
 					dput(dentry);
@@ -1158,7 +1158,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
 		if (unlikely(d_need_lookup(dentry)))
 			goto unlazy;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
-			status = d_revalidate(dentry, nd);
+			status = d_revalidate(dentry, nd->flags);
 			if (unlikely(status <= 0)) {
 				if (status != -ECHILD)
 					need_reval = 0;
@@ -1188,7 +1188,7 @@ unlazy:
 	}
 
 	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
-		status = d_revalidate(dentry, nd);
+		status = d_revalidate(dentry, nd->flags);
 	if (unlikely(status <= 0)) {
 		if (status < 0) {
 			dput(dentry);
-- 
cgit v1.2.3-70-g09d2


From 201f956e43d4542723514e024d948011dd766d43 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 22 Jun 2012 12:42:10 +0400
Subject: fs/namei.c: don't pass namedata to lookup_dcache()

just the flags...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 91c637b6898..2e943ab04f3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1039,7 +1039,7 @@ static void follow_dotdot(struct nameidata *nd)
  * dir->d_inode->i_mutex must be held
  */
 static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
-				    struct nameidata *nd, bool *need_lookup)
+				    unsigned int flags, bool *need_lookup)
 {
 	struct dentry *dentry;
 	int error;
@@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 		if (d_need_lookup(dentry)) {
 			*need_lookup = true;
 		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			error = d_revalidate(dentry, nd ? nd->flags : 0);
+			error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (error < 0) {
 					dput(dentry);
@@ -1104,7 +1104,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	bool need_lookup;
 	struct dentry *dentry;
 
-	dentry = lookup_dcache(name, base, nd, &need_lookup);
+	dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup);
 	if (!need_lookup)
 		return dentry;
 
@@ -2356,7 +2356,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	bool need_lookup;
 
 	*opened &= ~FILE_CREATED;
-	dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup);
+	dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-- 
cgit v1.2.3-70-g09d2


From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 17:13:09 -0400
Subject: stop passing nameidata to ->lookup()

Just the flags; only NFS cares even about that, but there are
legitimate uses for such argument.  And getting rid of that
completely would require splitting ->lookup() into a couple
of methods (at least), so let's leave that alone for now...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  3 +--
 Documentation/filesystems/porting |  4 ++--
 Documentation/filesystems/vfs.txt |  2 +-
 fs/9p/v9fs.h                      |  2 +-
 fs/9p/vfs_inode.c                 |  8 ++++----
 fs/9p/vfs_inode_dotl.c            |  2 +-
 fs/adfs/dir.c                     |  2 +-
 fs/affs/affs.h                    |  2 +-
 fs/affs/namei.c                   |  2 +-
 fs/afs/dir.c                      |  4 ++--
 fs/afs/mntpt.c                    |  4 ++--
 fs/autofs4/root.c                 |  4 ++--
 fs/bad_inode.c                    |  2 +-
 fs/befs/linuxvfs.c                |  4 ++--
 fs/bfs/dir.c                      |  2 +-
 fs/btrfs/inode.c                  |  2 +-
 fs/ceph/dir.c                     |  6 +++---
 fs/cifs/cifsfs.h                  |  2 +-
 fs/cifs/dir.c                     |  4 ++--
 fs/coda/dir.c                     |  4 ++--
 fs/configfs/dir.c                 |  2 +-
 fs/cramfs/inode.c                 |  2 +-
 fs/ecryptfs/inode.c               |  2 +-
 fs/efs/efs.h                      |  2 +-
 fs/efs/namei.c                    |  3 ++-
 fs/exofs/namei.c                  |  2 +-
 fs/ext2/namei.c                   |  2 +-
 fs/ext3/namei.c                   |  2 +-
 fs/ext4/namei.c                   |  2 +-
 fs/fat/namei_msdos.c              |  2 +-
 fs/fat/namei_vfat.c               |  2 +-
 fs/freevxfs/vxfs_lookup.c         |  4 ++--
 fs/fuse/dir.c                     |  4 ++--
 fs/gfs2/inode.c                   |  2 +-
 fs/hfs/dir.c                      |  2 +-
 fs/hfs/inode.c                    |  2 +-
 fs/hfsplus/dir.c                  |  2 +-
 fs/hfsplus/inode.c                |  2 +-
 fs/hostfs/hostfs_kern.c           |  2 +-
 fs/hpfs/dir.c                     |  2 +-
 fs/hpfs/hpfs_fn.h                 |  2 +-
 fs/hppfs/hppfs.c                  |  2 +-
 fs/isofs/isofs.h                  |  2 +-
 fs/isofs/namei.c                  |  2 +-
 fs/jffs2/dir.c                    |  4 ++--
 fs/jfs/namei.c                    |  2 +-
 fs/libfs.c                        |  2 +-
 fs/logfs/dir.c                    |  2 +-
 fs/minix/namei.c                  |  2 +-
 fs/namei.c                        |  2 +-
 fs/ncpfs/dir.c                    |  4 ++--
 fs/nfs/dir.c                      |  8 ++++----
 fs/nilfs2/namei.c                 |  2 +-
 fs/ntfs/namei.c                   |  2 +-
 fs/ocfs2/namei.c                  |  2 +-
 fs/omfs/dir.c                     |  2 +-
 fs/openpromfs/inode.c             |  4 ++--
 fs/proc/base.c                    | 18 ++++++++++--------
 fs/proc/generic.c                 |  2 +-
 fs/proc/internal.h                |  4 ++--
 fs/proc/namespaces.c              |  2 +-
 fs/proc/proc_net.c                |  2 +-
 fs/proc/proc_sysctl.c             |  2 +-
 fs/proc/root.c                    |  7 +++----
 fs/qnx4/namei.c                   |  2 +-
 fs/qnx4/qnx4.h                    |  2 +-
 fs/qnx6/namei.c                   |  2 +-
 fs/qnx6/qnx6.h                    |  2 +-
 fs/reiserfs/namei.c               |  2 +-
 fs/romfs/super.c                  |  2 +-
 fs/squashfs/namei.c               |  2 +-
 fs/sysfs/dir.c                    |  2 +-
 fs/sysv/namei.c                   |  2 +-
 fs/ubifs/dir.c                    |  2 +-
 fs/udf/namei.c                    |  2 +-
 fs/ufs/namei.c                    |  2 +-
 fs/xfs/xfs_iops.c                 |  4 ++--
 include/linux/fs.h                |  4 ++--
 kernel/cgroup.c                   |  4 ++--
 79 files changed, 115 insertions(+), 114 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 52a057367f6..33f2c8f1db8 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -38,8 +38,7 @@ d_manage:	no		no		yes (ref-walk)	maybe
 --------------------------- inode_operations --------------------------- 
 prototypes:
 	int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *);
-	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid
-ata *);
+	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 56750b714d1..690f573928b 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -434,5 +434,5 @@ d_make_root() drops the reference to inode if dentry allocation fails.
 
 --
 [mandatory]
-	The witch is dead!  Well, 1/3 of it, anyway.  ->d_revalidate() does *not*
-take struct nameidata anymore; just the flags.
+	The witch is dead!  Well, 2/3 of it, anyway.  ->d_revalidate() and
+->lookup() do *not* take struct nameidata anymore; just the flags.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b9a406b2ed0..ee786354946 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -342,7 +342,7 @@ filesystem. As of kernel 2.6.22, the following members are defined:
 
 struct inode_operations {
 	int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *);
-	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e78956cbd70..34c59f14a1c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -144,7 +144,7 @@ extern void v9fs_session_close(struct v9fs_session_info *v9ses);
 extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
 extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
-			struct nameidata *nameidata);
+			unsigned int flags);
 extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index eae476fb401..bb0d7627f95 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -785,7 +785,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
  */
 
 struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
-				      struct nameidata *nameidata)
+				      unsigned int flags)
 {
 	struct dentry *res;
 	struct super_block *sb;
@@ -795,8 +795,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	char *name;
 	int result = 0;
 
-	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
-		 dir, dentry->d_name.name, dentry, nameidata);
+	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n",
+		 dir, dentry->d_name.name, dentry, flags);
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -869,7 +869,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct dentry *res = NULL;
 
 	if (d_unhashed(dentry)) {
-		res = v9fs_vfs_lookup(dir, dentry, NULL);
+		res = v9fs_vfs_lookup(dir, dentry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 1ee10c89df9..b97619fed19 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	struct dentry *res = NULL;
 
 	if (d_unhashed(dentry)) {
-		res = v9fs_vfs_lookup(dir, dentry, NULL);
+		res = v9fs_vfs_lookup(dir, dentry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 3d83075aaa2..b3be2e7c564 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -266,7 +266,7 @@ const struct dentry_operations adfs_dentry_operations = {
 };
 
 static struct dentry *
-adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode = NULL;
 	struct object_info obj;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3a130e27eb1..49e4e3457bf 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -153,7 +153,7 @@ extern void	affs_free_bitmap(struct super_block *sb);
 /* namei.c */
 
 extern int	affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
-extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
+extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
 extern int	affs_unlink(struct inode *dir, struct dentry *dentry);
 extern int	affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
 extern int	affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 47806940aac..7f9721be709 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -211,7 +211,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 }
 
 struct dentry *
-affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct super_block *sb = dir->i_sb;
 	struct buffer_head *bh;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 65c54ab0473..ffb33e36ea7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -20,7 +20,7 @@
 #include "internal.h"
 
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
-				 struct nameidata *nd);
+				 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
@@ -516,7 +516,7 @@ out:
  * look up an entry in a directory
  */
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
-				 struct nameidata *nd)
+				 unsigned int flags)
 {
 	struct afs_vnode *vnode;
 	struct afs_fid fid;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 298cf8919ec..9682c33d5da 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -22,7 +22,7 @@
 
 static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
-				       struct nameidata *nd);
+				       unsigned int flags);
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void afs_mntpt_expiry_timed_out(struct work_struct *work);
 
@@ -104,7 +104,7 @@ out:
  */
 static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
-				       struct nameidata *nd)
+				       unsigned int flags)
 {
 	_enter("%p,%p{%p{%s},%s}",
 	       dir,
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 75e5f1c8e02..e7396cfdb10 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
 static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 #endif
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
+static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int);
 static struct vfsmount *autofs4_d_automount(struct path *);
 static int autofs4_d_manage(struct dentry *, bool);
 static void autofs4_dentry_release(struct dentry *);
@@ -458,7 +458,7 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 }
 
 /* Lookups in the root directory */
-static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 1b35d6bd06b..d27e73c69ba 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -179,7 +179,7 @@ static int bad_inode_create (struct inode *dir, struct dentry *dentry,
 }
 
 static struct dentry *bad_inode_lookup(struct inode *dir,
-			struct dentry *dentry, struct nameidata *nd)
+			struct dentry *dentry, unsigned int flags)
 {
 	return ERR_PTR(-EIO);
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e18da23d42b..cf7f3c67c8b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -34,7 +34,7 @@ static int befs_readdir(struct file *, void *, filldir_t);
 static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 static int befs_readpage(struct file *file, struct page *page);
 static sector_t befs_bmap(struct address_space *mapping, sector_t block);
-static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int);
 static struct inode *befs_iget(struct super_block *, unsigned long);
 static struct inode *befs_alloc_inode(struct super_block *sb);
 static void befs_destroy_inode(struct inode *inode);
@@ -159,7 +159,7 @@ befs_get_block(struct inode *inode, sector_t block,
 }
 
 static struct dentry *
-befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode = NULL;
 	struct super_block *sb = dir->i_sb;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d12c7966db2..3f1cd3b7168 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -133,7 +133,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
-						struct nameidata *nd)
+						unsigned int flags)
 {
 	struct inode *inode = NULL;
 	struct buffer_head *bh;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a101572f1ce..e5f1f81b2d6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4247,7 +4247,7 @@ static void btrfs_dentry_release(struct dentry *dentry)
 }
 
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	struct dentry *ret;
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8898eef8bca..74b2f3c54fe 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -576,7 +576,7 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
  * the MDS so that it gets our 'caps wanted' value in a single op.
  */
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -653,7 +653,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 
 	if (d_unhashed(dentry)) {
-		res = ceph_lookup(dir, dentry, NULL);
+		res = ceph_lookup(dir, dentry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
@@ -678,7 +678,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
  */
 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 {
-	struct dentry *result = ceph_lookup(dir, dentry, NULL);
+	struct dentry *result = ceph_lookup(dir, dentry, 0);
 
 	if (result && !IS_ERR(result)) {
 		/*
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 48bb474ce29..1abd31fd5bf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -50,7 +50,7 @@ extern int cifs_atomic_open(struct inode *, struct dentry *,
 			    struct file *, unsigned, umode_t,
 			    int *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
-				  struct nameidata *);
+				  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
 extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index b97ff48b7df..2d732b9276e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -401,7 +401,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	 * in network traffic in the other paths.
 	 */
 	if (!(oflags & O_CREAT)) {
-		struct dentry *res = cifs_lookup(inode, direntry, NULL);
+		struct dentry *res = cifs_lookup(inode, direntry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
@@ -621,7 +621,7 @@ mknod_out:
 
 struct dentry *
 cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
-	    struct nameidata *nd)
+	    unsigned int flags)
 {
 	int xid;
 	int rc = 0; /* to get around spurious gcc warning, set to zero here */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 7f8f1a7c6d8..da35e965861 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -31,7 +31,7 @@
 
 /* dir inode-ops */
 static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
-static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
+static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags);
 static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 
 		     struct dentry *entry);
 static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
@@ -94,7 +94,7 @@ const struct file_operations coda_dir_operations = {
 
 /* inode operations for directories */
 /* access routines: lookup, readlink, permission */
-static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd)
+static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags)
 {
 	struct super_block *sb = dir->i_sb;
 	const char *name = entry->d_name.name;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7e6c52d8a20..7414ae24a79 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
 
 static struct dentry * configfs_lookup(struct inode *dir,
 				       struct dentry *dentry,
-				       struct nameidata *nd)
+				       unsigned int flags)
 {
 	struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct configfs_dirent * sd;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index d013c46402e..28cca01ca9c 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -417,7 +417,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 /*
  * Lookup and fill in the inode data..
  */
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	unsigned int offset = 0;
 	struct inode *inode = NULL;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a07441a0a87..4ab50c3f5ab 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -374,7 +374,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
  */
 static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 				      struct dentry *ecryptfs_dentry,
-				      struct nameidata *ecryptfs_nd)
+				      unsigned int flags)
 {
 	char *encrypted_and_encoded_name = NULL;
 	size_t encrypted_and_encoded_name_size;
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
index d8305b582ab..5528926ac7f 100644
--- a/fs/efs/efs.h
+++ b/fs/efs/efs.h
@@ -129,7 +129,7 @@ extern struct inode *efs_iget(struct super_block *, unsigned long);
 extern efs_block_t efs_map_block(struct inode *, efs_block_t);
 extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
-extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *efs_lookup(struct inode *, struct dentry *, unsigned int);
 extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 		int fh_len, int fh_type);
 extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 832b10ded82..96f66d213a1 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -58,7 +58,8 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
 	return(0);
 }
 
-struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
+struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
 	efs_ino_t inodenum;
 	struct inode *inode = NULL;
 
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index fc7161d6bf6..909ed6ea4cf 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -46,7 +46,7 @@ static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
 }
 
 static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	struct inode *inode;
 	ino_t ino;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f663a67d7bf..b3e6778cd1e 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -55,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
  * Methods themselves.
  */
 
-static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode * inode;
 	ino_t ino;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index eeb63dfc5d2..86d25f3f604 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1011,7 +1011,7 @@ errout:
 	return NULL;
 }
 
-static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode * inode;
 	struct ext3_dir_entry_2 * de;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5845cd97bf8..4fba3cd42e2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1312,7 +1312,7 @@ errout:
 	return NULL;
 }
 
-static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	struct ext4_dir_entry_2 *de;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c5938c9084b..47c608b0529 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -201,7 +201,7 @@ static const struct dentry_operations msdos_dentry_operations = {
 
 /***** Get inode using directory and name */
 static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	struct super_block *sb = dir->i_sb;
 	struct fat_slot_info sinfo;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 0bbdf399006..44152571524 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -714,7 +714,7 @@ static int vfat_d_anon_disconn(struct dentry *dentry)
 }
 
 static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	struct super_block *sb = dir->i_sb;
 	struct fat_slot_info sinfo;
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 3360f1e678a..bd447e88f20 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -48,7 +48,7 @@
 #define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
 
 
-static struct dentry *	vxfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *	vxfs_lookup(struct inode *, struct dentry *, unsigned int);
 static int		vxfs_readdir(struct file *, void *, filldir_t);
 
 const struct inode_operations vxfs_dir_inode_ops = {
@@ -203,7 +203,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp)
  *   in the return pointer.
  */
 static struct dentry *
-vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd)
+vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
 {
 	struct inode		*ip = NULL;
 	ino_t			ino;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index eba30bd9ba2..385235ac137 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -316,7 +316,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 }
 
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	int err;
 	struct fuse_entry_out outarg;
@@ -478,7 +478,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	struct dentry *res = NULL;
 
 	if (d_unhashed(entry)) {
-		res = fuse_lookup(dir, entry, NULL);
+		res = fuse_lookup(dir, entry, 0);
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9ba2444e07..19e443b7335 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -775,7 +775,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
  */
 
 static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
 	if (inode && !IS_ERR(inode)) {
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 62fc14ea4b7..617b1ed71f5 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -18,7 +18,7 @@
  * hfs_lookup()
  */
 static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
-				 struct nameidata *nd)
+				 unsigned int flags)
 {
 	hfs_cat_rec rec;
 	struct hfs_find_data fd;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 761ec06354b..451c97281b8 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -489,7 +489,7 @@ out:
 }
 
 static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
-				      struct nameidata *nd)
+				      unsigned int flags)
 {
 	struct inode *inode = NULL;
 	hfs_cat_rec rec;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 26b53fb09f6..90c2f78b2c7 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -25,7 +25,7 @@ static inline void hfsplus_instantiate(struct dentry *dentry,
 
 /* Find the entry inside dir named dentry->d_name */
 static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
-				     struct nameidata *nd)
+				     unsigned int flags)
 {
 	struct inode *inode = NULL;
 	struct hfs_find_data fd;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 82b69ee4dac..7009265b746 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -168,7 +168,7 @@ const struct dentry_operations hfsplus_dentry_operations = {
 };
 
 static struct dentry *hfsplus_file_lookup(struct inode *dir,
-		struct dentry *dentry, struct nameidata *nd)
+		struct dentry *dentry, unsigned int flags)
 {
 	struct hfs_find_data fd;
 	struct super_block *sb = dir->i_sb;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2afa5bbccf9..0ea005228e1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -595,7 +595,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
-			     struct nameidata *nd)
+			     unsigned int flags)
 {
 	struct inode *inode;
 	char *name;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b8472f803f4..78e12b2e0ea 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -189,7 +189,7 @@ out:
  *	      to tell read_inode to read fnode or not.
  */
 
-struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c07ef1f1ced..ac1ead194db 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -220,7 +220,7 @@ extern const struct dentry_operations hpfs_dentry_operations;
 
 /* dir.c */
 
-struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int);
 extern const struct file_operations hpfs_dir_ops;
 
 /* dnode.c */
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d4f93b52cec..e5c06531dcc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -138,7 +138,7 @@ static int file_removed(struct dentry *dentry, const char *file)
 }
 
 static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	struct dentry *proc_dentry, *parent;
 	struct qstr *name = &dentry->d_name;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 0e73f63d927..3620ad1ea9b 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -114,7 +114,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in
 int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *);
 int get_acorn_filename(struct iso_directory_record *, char *, struct inode *);
 
-extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern struct buffer_head *isofs_bread(struct inode *, sector_t);
 extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
 
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 1e2946f2a69..c167028844e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -163,7 +163,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 	return 0;
 }
 
-struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	int found;
 	unsigned long uninitialized_var(block);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index b56018896d5..6a601673f89 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -27,7 +27,7 @@ static int jffs2_readdir (struct file *, void *, filldir_t);
 static int jffs2_create (struct inode *,struct dentry *,umode_t,
 			 struct nameidata *);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
-				    struct nameidata *);
+				    unsigned int);
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
 static int jffs2_unlink (struct inode *,struct dentry *);
 static int jffs2_symlink (struct inode *,struct dentry *,const char *);
@@ -74,7 +74,7 @@ const struct inode_operations jffs2_dir_inode_operations =
    nice and simple
 */
 static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	struct jffs2_inode_info *dir_f;
 	struct jffs2_full_dirent *fd = NULL, *fd_list;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index f37977fb087..34fe85555ca 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1436,7 +1436,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	return rc;
 }
 
-static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
 {
 	struct btstack btstack;
 	ino_t inum;
diff --git a/fs/libfs.c b/fs/libfs.c
index f86ec27a423..ebd03f6910d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -53,7 +53,7 @@ static int simple_delete_dentry(const struct dentry *dentry)
  * Lookup the data. This is trivial - if the dentry didn't already
  * exist, we know it is negative.  Set d_op to delete negative dentries.
  */
-struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	static const struct dentry_operations simple_dentry_operations = {
 		.d_delete = simple_delete_dentry,
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index bea5d1b9954..8a3dcc615b3 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -349,7 +349,7 @@ static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name)
 }
 
 static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
-		struct nameidata *nd)
+		unsigned int flags)
 {
 	struct page *page;
 	struct logfs_disk_dentry *dd;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2d0ee178630..1f245240ea0 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,7 +18,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 	return err;
 }
 
-static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode * inode = NULL;
 	ino_t ino;
diff --git a/fs/namei.c b/fs/namei.c
index 2e943ab04f3..175e81b8f26 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENOENT);
 	}
 
-	old = dir->i_op->lookup(dir, dentry, nd);
+	old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0);
 	if (unlikely(old)) {
 		dput(dentry);
 		dentry = old;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 32607f74958..a0cff22bfc9 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -31,7 +31,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
 static int ncp_readdir(struct file *, void *, filldir_t);
 
 static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
-static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int);
 static int ncp_unlink(struct inode *, struct dentry *);
 static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
 static int ncp_rmdir(struct inode *, struct dentry *);
@@ -836,7 +836,7 @@ out:
 	return result;
 }
 
-static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct ncp_server *server = NCP_SERVER(dir);
 	struct inode *inode = NULL;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 656f52e9aa2..8f21205c589 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -46,7 +46,7 @@
 static int nfs_opendir(struct inode *, struct file *);
 static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
-static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
 static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
@@ -1270,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = {
 	.d_release	= nfs_d_release,
 };
 
-static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *res;
 	struct dentry *parent;
@@ -1291,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	 * If we're doing an exclusive create, optimize away the lookup
 	 * but don't hash the dentry.
 	 */
-	if (nd && nfs_is_exclusive_create(dir, nd->flags)) {
+	if (nfs_is_exclusive_create(dir, flags)) {
 		d_instantiate(dentry, NULL);
 		res = NULL;
 		goto out;
@@ -1482,7 +1482,7 @@ out:
 	return err;
 
 no_open:
-	res = nfs_lookup(dir, dentry, NULL);
+	res = nfs_lookup(dir, dentry, 0);
 	err = PTR_ERR(res);
 	if (IS_ERR(res))
 		goto out;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index b72847988b7..5e5f779db76 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
  */
 
 static struct dentry *
-nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	ino_t ino;
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358273e59ad..436f36037e0 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -101,7 +101,7 @@
  * Locking: Caller must hold i_mutex on the directory.
  */
 static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
-		struct nameidata *nd)
+		unsigned int flags)
 {
 	ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
 	struct inode *dent_inode;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9f39c640cdd..fd71f6e5841 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -98,7 +98,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
 
 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	int status;
 	u64 blkno;
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index f00576ec320..3d254872e64 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -291,7 +291,7 @@ static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	struct buffer_head *bh;
 	struct inode *inode = NULL;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index bc49c975d50..4a3477949bc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -170,13 +170,13 @@ static const struct file_operations openprom_operations = {
 	.llseek		= generic_file_llseek,
 };
 
-static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, unsigned int);
 
 static const struct inode_operations openprom_inode_operations = {
 	.lookup		= openpromfs_lookup,
 };
 
-static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	struct op_inode_info *ent_oi, *oi = OP_I(dir);
 	struct device_node *dp, *child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index bf749cca4cc..8eaa5ea1c61 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1956,7 +1956,7 @@ out_no_task:
 }
 
 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
-				    struct nameidata *nd)
+				    unsigned int flags)
 {
 	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
 }
@@ -2145,7 +2145,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
 }
 
 static struct dentry *proc_map_files_lookup(struct inode *dir,
-		struct dentry *dentry, struct nameidata *nd)
+		struct dentry *dentry, unsigned int flags)
 {
 	unsigned long vm_start, vm_end;
 	struct vm_area_struct *vma;
@@ -2380,7 +2380,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 
 static struct dentry *proc_lookupfdinfo(struct inode *dir,
 					struct dentry *dentry,
-					struct nameidata *nd)
+					unsigned int flags)
 {
 	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
 }
@@ -2630,7 +2630,7 @@ static const struct file_operations proc_attr_dir_operations = {
 };
 
 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
-				struct dentry *dentry, struct nameidata *nd)
+				struct dentry *dentry, unsigned int flags)
 {
 	return proc_pident_lookup(dir, dentry,
 				  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
@@ -3114,7 +3114,8 @@ static const struct file_operations proc_tgid_base_operations = {
 	.llseek		= default_llseek,
 };
 
-static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
 	return proc_pident_lookup(dir, dentry,
 				  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
 }
@@ -3243,7 +3244,7 @@ out:
 	return error;
 }
 
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *result;
 	struct task_struct *task;
@@ -3470,7 +3471,8 @@ static int proc_tid_base_readdir(struct file * filp,
 				   tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
 }
 
-static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
 	return proc_pident_lookup(dir, dentry,
 				  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
 }
@@ -3514,7 +3516,7 @@ out:
 	return error;
 }
 
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *result = ERR_PTR(-ENOENT);
 	struct task_struct *task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2edf34f2eb6..b3647fe6a60 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -446,7 +446,7 @@ out_unlock:
 }
 
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
-		struct nameidata *nd)
+		unsigned int flags)
 {
 	return proc_lookup_de(PDE(dir), dir, dentry);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e0c2a48dab7..e1167a1c912 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -106,7 +106,7 @@ void pde_users_dec(struct proc_dir_entry *pde);
 
 extern spinlock_t proc_subdir_lock;
 
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 unsigned long task_statm(struct mm_struct *,
@@ -132,7 +132,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data);
  * of the /proc/<pid> subdirectories.
  */
 int proc_readdir(struct file *, void *, filldir_t);
-struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
 
 
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 40ceb40f985..b178ed733c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -140,7 +140,7 @@ const struct file_operations proc_ns_dir_operations = {
 };
 
 static struct dentry *proc_ns_dir_lookup(struct inode *dir,
-				struct dentry *dentry, struct nameidata *nd)
+				struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *error;
 	struct task_struct *task = get_proc_task(dir);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 06e1cc17caf..fe72cd073de 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir)
 }
 
 static struct dentry *proc_tgid_net_lookup(struct inode *dir,
-		struct dentry *dentry, struct nameidata *nd)
+		struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *de;
 	struct net *net;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fda69fa3909..dfafeb2b05a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -433,7 +433,7 @@ static struct ctl_table_header *grab_header(struct inode *inode)
 }
 
 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-					struct nameidata *nd)
+					unsigned int flags)
 {
 	struct ctl_table_header *head = grab_header(dir);
 	struct ctl_table_header *h = NULL;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 7c30fce037c..568b20290c7 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -200,13 +200,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
 	return 0;
 }
 
-static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
 {
-	if (!proc_lookup(dir, dentry, nd)) {
+	if (!proc_lookup(dir, dentry, flags))
 		return NULL;
-	}
 	
-	return proc_pid_lookup(dir, dentry, nd);
+	return proc_pid_lookup(dir, dentry, flags);
 }
 
 static int proc_root_readdir(struct file * filp,
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index a512c0b30e8..d024505ba00 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -95,7 +95,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
 	return NULL;
 }
 
-struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	int ino;
 	struct qnx4_inode_entry *de;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 244d4620189..34e2d329c97 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -23,7 +23,7 @@ struct qnx4_inode_info {
 };
 
 extern struct inode *qnx4_iget(struct super_block *, unsigned long);
-extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
 extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
 
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 8a97289e04a..0561326a94f 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -13,7 +13,7 @@
 #include "qnx6.h"
 
 struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
-				struct nameidata *nd)
+				unsigned int flags)
 {
 	unsigned ino;
 	struct page *page;
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index 6c5e02a0b6a..b00fcc960d3 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -45,7 +45,7 @@ struct qnx6_inode_info {
 
 extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
 extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
-					struct nameidata *nd);
+					unsigned int flags);
 
 #ifdef CONFIG_QNX6FS_DEBUG
 extern void qnx6_superblock_debug(struct qnx6_super_block *,
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 84e8a69cee9..1d9cf248c47 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -322,7 +322,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
 }
 
 static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
-				      struct nameidata *nd)
+				      unsigned int flags)
 {
 	int retval;
 	int lock_depth;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e64f6b5f7ae..77c5f217398 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -210,7 +210,7 @@ out:
  * look up an entry in a directory
  */
 static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	unsigned long offset, maxoff;
 	struct inode *inode;
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index abcc58f3c15..7834a517f7f 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -134,7 +134,7 @@ out:
 
 
 static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
-				 struct nameidata *nd)
+				 unsigned int flags)
 {
 	const unsigned char *name = dentry->d_name.name;
 	int len = dentry->d_name.len;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 038e74b3af8..efd373e3e0a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -764,7 +764,7 @@ int sysfs_create_dir(struct kobject * kobj)
 }
 
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
-				struct nameidata *nd)
+				unsigned int flags)
 {
 	struct dentry *ret = NULL;
 	struct dentry *parent = dentry->d_parent;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d7466e29361..a8c4359cd0e 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -43,7 +43,7 @@ const struct dentry_operations sysv_dentry_operations = {
 	.d_hash		= sysv_hash,
 };
 
-static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
 {
 	struct inode * inode = NULL;
 	ino_t ino;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index a6d42efc76d..845b2df0831 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -184,7 +184,7 @@ static int dbg_check_name(const struct ubifs_info *c,
 }
 
 static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
-				   struct nameidata *nd)
+				   unsigned int flags)
 {
 	int err;
 	union ubifs_key key;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 18024178ac4..929cc205985 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -251,7 +251,7 @@ out_ok:
 }
 
 static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
-				 struct nameidata *nd)
+				 unsigned int flags)
 {
 	struct inode *inode = NULL;
 	struct fileIdentDesc cfi;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index a2281cadefa..bc77fa170b9 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -46,7 +46,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 	return err;
 }
 
-static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
 	struct inode * inode = NULL;
 	ino_t ino;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1a25fd80279..b41cfba14fa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -197,7 +197,7 @@ STATIC struct dentry *
 xfs_vn_lookup(
 	struct inode	*dir,
 	struct dentry	*dentry,
-	struct nameidata *nd)
+	unsigned int flags)
 {
 	struct xfs_inode *cip;
 	struct xfs_name	name;
@@ -222,7 +222,7 @@ STATIC struct dentry *
 xfs_vn_ci_lookup(
 	struct inode	*dir,
 	struct dentry	*dentry,
-	struct nameidata *nd)
+	unsigned int flags)
 {
 	struct xfs_inode *ip;
 	struct xfs_name	xname;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 17ee20dba86..7a71709b7fa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1666,7 +1666,7 @@ struct file_operations {
 };
 
 struct inode_operations {
-	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	int (*permission) (struct inode *, int);
 	struct posix_acl * (*get_acl)(struct inode *, int);
@@ -2571,7 +2571,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata);
 
-extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
 extern const struct file_operations simple_dir_operations;
 extern const struct inode_operations simple_dir_inode_operations;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b303dfc7dce..0cd1314acda 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
  */
 
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
-static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
 static int cgroup_populate_dir(struct cgroup *cgrp);
 static const struct inode_operations cgroup_dir_inode_operations;
@@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
 	.rename = cgroup_rename,
 };
 
-static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
-- 
cgit v1.2.3-70-g09d2


From 72bd866a01fc62ccbc466f3eb7599b14c937e96b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 17:17:17 -0400
Subject: fs/namei.c: don't pass nameidata to __lookup_hash() and lookup_real()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 175e81b8f26..fc01090a96c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1080,7 +1080,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
  * dir->d_inode->i_mutex must be held
  */
 static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
+				  unsigned int flags)
 {
 	struct dentry *old;
 
@@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENOENT);
 	}
 
-	old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0);
+	old = dir->i_op->lookup(dir, dentry, flags);
 	if (unlikely(old)) {
 		dput(dentry);
 		dentry = old;
@@ -1099,16 +1099,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
 }
 
 static struct dentry *__lookup_hash(struct qstr *name,
-		struct dentry *base, struct nameidata *nd)
+		struct dentry *base, unsigned int flags)
 {
 	bool need_lookup;
 	struct dentry *dentry;
 
-	dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup);
+	dentry = lookup_dcache(name, base, flags, &need_lookup);
 	if (!need_lookup)
 		return dentry;
 
-	return lookup_real(base->d_inode, dentry, nd);
+	return lookup_real(base->d_inode, dentry, flags);
 }
 
 /*
@@ -1227,7 +1227,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name,
 	BUG_ON(nd->inode != parent->d_inode);
 
 	mutex_lock(&parent->d_inode->i_mutex);
-	dentry = __lookup_hash(name, parent, nd);
+	dentry = __lookup_hash(name, parent, nd->flags);
 	mutex_unlock(&parent->d_inode->i_mutex);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
@@ -1859,7 +1859,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
  */
 static struct dentry *lookup_hash(struct nameidata *nd)
 {
-	return __lookup_hash(&nd->last, nd->path.dentry, nd);
+	return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
 }
 
 /**
@@ -1906,7 +1906,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 	if (err)
 		return ERR_PTR(err);
 
-	return __lookup_hash(&this, base, NULL);
+	return __lookup_hash(&this, base, 0);
 }
 
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
@@ -2310,7 +2310,7 @@ out:
 
 no_open:
 	if (need_lookup) {
-		dentry = lookup_real(dir, dentry, nd);
+		dentry = lookup_real(dir, dentry, nd->flags);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 
@@ -2372,7 +2372,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	if (need_lookup) {
 		BUG_ON(dentry->d_inode);
 
-		dentry = lookup_real(dir_inode, dentry, nd);
+		dentry = lookup_real(dir_inode, dentry, nd->flags);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 	}
-- 
cgit v1.2.3-70-g09d2


From ebfc3b49a7ac25920cb5be5445f602e51d2ea559 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 18:05:36 -0400
Subject: don't pass nameidata to ->create()

boolean "does it have to be exclusive?" flag is passed instead;
Local filesystem should just ignore it - the object is guaranteed
not to be there yet.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 2 +-
 Documentation/filesystems/porting | 6 ++++++
 Documentation/filesystems/vfs.txt | 2 +-
 fs/9p/vfs_inode.c                 | 2 +-
 fs/9p/vfs_inode_dotl.c            | 2 +-
 fs/affs/affs.h                    | 2 +-
 fs/affs/namei.c                   | 2 +-
 fs/afs/dir.c                      | 4 ++--
 fs/bad_inode.c                    | 2 +-
 fs/bfs/dir.c                      | 2 +-
 fs/btrfs/inode.c                  | 2 +-
 fs/ceph/dir.c                     | 2 +-
 fs/cifs/cifsfs.h                  | 2 +-
 fs/cifs/dir.c                     | 2 +-
 fs/coda/dir.c                     | 4 ++--
 fs/ecryptfs/inode.c               | 3 +--
 fs/exofs/namei.c                  | 2 +-
 fs/ext2/namei.c                   | 2 +-
 fs/ext3/namei.c                   | 2 +-
 fs/ext4/namei.c                   | 2 +-
 fs/fat/namei_msdos.c              | 2 +-
 fs/fat/namei_vfat.c               | 2 +-
 fs/fuse/dir.c                     | 2 +-
 fs/gfs2/inode.c                   | 5 +----
 fs/hfs/dir.c                      | 2 +-
 fs/hfsplus/dir.c                  | 2 +-
 fs/hostfs/hostfs_kern.c           | 2 +-
 fs/hpfs/namei.c                   | 2 +-
 fs/hugetlbfs/inode.c              | 2 +-
 fs/jffs2/dir.c                    | 4 ++--
 fs/jfs/namei.c                    | 2 +-
 fs/logfs/dir.c                    | 2 +-
 fs/minix/namei.c                  | 2 +-
 fs/namei.c                        | 3 +--
 fs/ncpfs/dir.c                    | 4 ++--
 fs/nfs/dir.c                      | 9 +++------
 fs/nilfs2/namei.c                 | 2 +-
 fs/ocfs2/dlmfs/dlmfs.c            | 2 +-
 fs/ocfs2/namei.c                  | 2 +-
 fs/omfs/dir.c                     | 2 +-
 fs/ramfs/inode.c                  | 2 +-
 fs/reiserfs/namei.c               | 2 +-
 fs/reiserfs/xattr.c               | 2 +-
 fs/sysv/namei.c                   | 2 +-
 fs/ubifs/dir.c                    | 2 +-
 fs/udf/namei.c                    | 2 +-
 fs/ufs/namei.c                    | 2 +-
 fs/xfs/xfs_iops.c                 | 2 +-
 include/linux/fs.h                | 2 +-
 ipc/mqueue.c                      | 2 +-
 mm/shmem.c                        | 2 +-
 51 files changed, 62 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 33f2c8f1db8..e0cce2a5f82 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -37,7 +37,7 @@ d_manage:	no		no		yes (ref-walk)	maybe
 
 --------------------------- inode_operations --------------------------- 
 prototypes:
-	int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *);
+	int (*create) (struct inode *,struct dentry *,umode_t, bool);
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 690f573928b..2bef2b3843d 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -436,3 +436,9 @@ d_make_root() drops the reference to inode if dentry allocation fails.
 [mandatory]
 	The witch is dead!  Well, 2/3 of it, anyway.  ->d_revalidate() and
 ->lookup() do *not* take struct nameidata anymore; just the flags.
+--
+[mandatory]
+	->create() doesn't take struct nameidata *; unlike the previous
+two, it gets "is it an O_EXCL or equivalent?" boolean argument.  Note that
+local filesystems can ignore tha argument - they are guaranteed that the
+object doesn't exist.  It's remote/distributed ones that might care...
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index ee786354946..aa754e01464 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -341,7 +341,7 @@ This describes how the VFS can manipulate an inode in your
 filesystem. As of kernel 2.6.22, the following members are defined:
 
 struct inode_operations {
-	int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *);
+	int (*create) (struct inode *,struct dentry *, umode_t, bool);
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index bb0d7627f95..cbf9dbb1b2a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -725,7 +725,7 @@ error:
 
 static int
 v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
 	u32 perm = unixmode2p9mode(v9ses, mode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index b97619fed19..40895546e10 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -235,7 +235,7 @@ int v9fs_open_to_dotl_flags(int flags)
 
 static int
 v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
-		struct nameidata *nd)
+		bool excl)
 {
 	return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
 }
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 49e4e3457bf..6e216419f34 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -155,7 +155,7 @@ extern void	affs_free_bitmap(struct super_block *sb);
 extern int	affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
 extern int	affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int	affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
+extern int	affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool);
 extern int	affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 extern int	affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int	affs_link(struct dentry *olddentry, struct inode *dir,
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 7f9721be709..ff65884a783 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,7 +255,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
 }
 
 int
-affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode	*inode;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ffb33e36ea7..db477906ba4 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -29,7 +29,7 @@ static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      struct nameidata *nd);
+		      bool excl);
 static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
@@ -949,7 +949,7 @@ error:
  * create a regular file on an AFS filesystem
  */
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      struct nameidata *nd)
+		      bool excl)
 {
 	struct afs_file_status status;
 	struct afs_callback cb;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index d27e73c69ba..b1342ffb3cf 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
 };
 
 static int bad_inode_create (struct inode *dir, struct dentry *dentry,
-		umode_t mode, struct nameidata *nd)
+		umode_t mode, bool excl)
 {
 	return -EIO;
 }
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 3f1cd3b7168..2785ef91191 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -85,7 +85,7 @@ const struct file_operations bfs_dir_operations = {
 extern void dump_imap(const char *, struct super_block *);
 
 static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-						struct nameidata *nd)
+						bool excl)
 {
 	int err;
 	struct inode *inode;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e5f1f81b2d6..fb8d671d00e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4893,7 +4893,7 @@ out_unlock:
 }
 
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
-			umode_t mode, struct nameidata *nd)
+			umode_t mode, bool excl)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 74b2f3c54fe..00894ff9246 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -730,7 +730,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 }
 
 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		       struct nameidata *nd)
+		       bool excl)
 {
 	return ceph_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 1abd31fd5bf..1c49c5a9b27 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -45,7 +45,7 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
-		       struct nameidata *);
+		       bool excl);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
 			    struct file *, unsigned, umode_t,
 			    int *);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2d732b9276e..a180265a10b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -451,7 +451,7 @@ free_xid:
 }
 
 int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	int rc;
 	int xid = GetXid();
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index da35e965861..49fe52d2560 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,7 +30,7 @@
 #include "coda_int.h"
 
 /* dir inode-ops */
-static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
+static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl);
 static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags);
 static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 
 		     struct dentry *entry);
@@ -188,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 }
 
 /* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
+static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl)
 {
 	int error;
 	const char *name=de->d_name.name;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4ab50c3f5ab..f079dafea75 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -240,7 +240,6 @@ out:
  * @dir: The inode of the directory in which to create the file.
  * @dentry: The eCryptfs dentry
  * @mode: The mode of the new file.
- * @nd: nameidata
  *
  * Creates a new file.
  *
@@ -248,7 +247,7 @@ out:
  */
 static int
 ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
-		umode_t mode, struct nameidata *nd)
+		umode_t mode, bool excl)
 {
 	struct inode *ecryptfs_inode;
 	int rc;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 909ed6ea4cf..4731fd991ef 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -60,7 +60,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			 struct nameidata *nd)
+			 bool excl)
 {
 	struct inode *inode = exofs_new_inode(dir, mode);
 	int err = PTR_ERR(inode);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index b3e6778cd1e..9ba7de0e590 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate(). 
  */
-static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
+static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
 {
 	struct inode *inode;
 
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 86d25f3f604..85286dbe275 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1690,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle,
  * with d_instantiate().
  */
 static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	handle_t *handle;
 	struct inode * inode;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 4fba3cd42e2..eca3e48a62f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle,
  * with d_instantiate().
  */
 static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		       struct nameidata *nd)
+		       bool excl)
 {
 	handle_t *handle;
 	struct inode *inode;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 47c608b0529..70d993a9380 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -265,7 +265,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 
 /***** Create a file */
 static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			struct nameidata *nd)
+			bool excl)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode = NULL;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 44152571524..6cc48065243 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -772,7 +772,7 @@ error:
 }
 
 static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		       struct nameidata *nd)
+		       bool excl)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 385235ac137..8964cf3999b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -611,7 +611,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 }
 
 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
-		       struct nameidata *nd)
+		       bool excl)
 {
 	return fuse_mknod(dir, entry, mode, 0);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 19e443b7335..867674785fc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -755,11 +755,8 @@ fail:
  */
 
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
-		       umode_t mode, struct nameidata *nd)
+		       umode_t mode, bool excl)
 {
-	int excl = 0;
-	if (nd && (nd->flags & LOOKUP_EXCL))
-		excl = 1;
 	return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
 }
 
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 617b1ed71f5..422dde2ec0a 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -187,7 +187,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
  * the directory and the name (and its length) of the new file.
  */
 static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      struct nameidata *nd)
+		      bool excl)
 {
 	struct inode *inode;
 	int res;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 90c2f78b2c7..378ea0c43f1 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -465,7 +465,7 @@ out:
 }
 
 static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			  struct nameidata *nd)
+			  bool excl)
 {
 	return hfsplus_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 0ea005228e1..124146543aa 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -553,7 +553,7 @@ static int read_name(struct inode *ino, char *name)
 }
 
 int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		  struct nameidata *nd)
+		  bool excl)
 {
 	struct inode *inode;
 	char *name;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9083ef8af58..bc9082482f6 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -115,7 +115,7 @@ bail:
 	return err;
 }
 
-static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
 	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cc9281b6c62..e13e9bdb0bf 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -565,7 +565,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod
 	return retval;
 }
 
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
 	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6a601673f89..23245191c5b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -25,7 +25,7 @@
 static int jffs2_readdir (struct file *, void *, filldir_t);
 
 static int jffs2_create (struct inode *,struct dentry *,umode_t,
-			 struct nameidata *);
+			 bool);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
 				    unsigned int);
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
@@ -175,7 +175,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 
 static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
-			umode_t mode, struct nameidata *nd)
+			umode_t mode, bool excl)
 {
 	struct jffs2_raw_inode *ri;
 	struct jffs2_inode_info *f, *dir_f;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 34fe85555ca..c426293e16c 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -73,7 +73,7 @@ static inline void free_ea_wmap(struct inode *inode)
  *
  */
 static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	int rc = 0;
 	tid_t tid;		/* transaction id */
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 8a3dcc615b3..26e4a941532 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -502,7 +502,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 
 static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	struct inode *inode;
 
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 1f245240ea0..0db73d9dd66 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -55,7 +55,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
 }
 
 static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	return minix_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/namei.c b/fs/namei.c
index fc01090a96c..fd71156bfd7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2082,7 +2082,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	int error = may_create(dir, dentry);
-
 	if (error)
 		return error;
 
@@ -2093,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	error = dir->i_op->create(dir, dentry, mode, nd);
+	error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL));
 	if (!error)
 		fsnotify_create(dir, dentry);
 	return error;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index a0cff22bfc9..4117e7b377b 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,7 +30,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
 
 static int ncp_readdir(struct file *, void *, filldir_t);
 
-static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+static int ncp_create(struct inode *, struct dentry *, umode_t, bool);
 static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int);
 static int ncp_unlink(struct inode *, struct dentry *);
 static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
@@ -980,7 +980,7 @@ out:
 }
 
 static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	return ncp_create_new(dir, dentry, mode, 0, 0);
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8f21205c589..a6b1c7fb823 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,7 +47,7 @@ static int nfs_opendir(struct inode *, struct file *);
 static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
-static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, umode_t, bool);
 static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
@@ -1589,11 +1589,11 @@ out_error:
  * reply path made it appear to have failed.
  */
 static int nfs_create(struct inode *dir, struct dentry *dentry,
-		umode_t mode, struct nameidata *nd)
+		umode_t mode, bool excl)
 {
 	struct iattr attr;
+	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
 	int error;
-	int open_flags = O_CREAT|O_EXCL;
 
 	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1601,9 +1601,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if (nd && !(nd->flags & LOOKUP_EXCL))
-		open_flags = O_CREAT;
-
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
 	if (error != 0)
 		goto out_err;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 5e5f779db76..1d0c0b84c5a 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -85,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  * with d_instantiate().
  */
 static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			struct nameidata *nd)
+			bool excl)
 {
 	struct inode *inode;
 	struct nilfs_transaction_info ti;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index e31d6ae013a..83b6f98e066 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -526,7 +526,7 @@ bail:
 static int dlmfs_create(struct inode *dir,
 			struct dentry *dentry,
 			umode_t mode,
-			struct nameidata *nd)
+			bool excl)
 {
 	int status = 0;
 	struct inode *inode;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index fd71f6e5841..f1fd0741162 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -618,7 +618,7 @@ static int ocfs2_mkdir(struct inode *dir,
 static int ocfs2_create(struct inode *dir,
 			struct dentry *dentry,
 			umode_t mode,
-			struct nameidata *nd)
+			bool excl)
 {
 	int ret;
 
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3d254872e64..fb5b3ff79dc 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -285,7 +285,7 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 
 static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	return omfs_add_node(dir, dentry, mode | S_IFREG);
 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a1fdabe21de..eab8c09d380 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 	return retval;
 }
 
-static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
 	return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 1d9cf248c47..3916be1a330 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
 }
 
 static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			   struct nameidata *nd)
+			   bool excl)
 {
 	int retval;
 	struct inode *inode;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e6ad8d7dea6..d319963aeb1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -62,7 +62,7 @@
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	return dir->i_op->create(dir, dentry, mode, NULL);
+	return dir->i_op->create(dir, dentry, mode, true);
 }
 #endif
 
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index a8c4359cd0e..1c0d5f26476 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode,
 	return err;
 }
 
-static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
+static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
 {
 	return sysv_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 845b2df0831..b1cca89aeb6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -246,7 +246,7 @@ out:
 }
 
 static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-			struct nameidata *nd)
+			bool excl)
 {
 	struct inode *inode;
 	struct ubifs_info *c = dir->i_sb->s_fs_info;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 929cc205985..544b2799a91 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
 }
 
 static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      struct nameidata *nd)
+		      bool excl)
 {
 	struct udf_fileident_bh fibh;
 	struct inode *inode;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index bc77fa170b9..90d74b8f8eb 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -71,7 +71,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi
  * with d_instantiate(). 
  */
 static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	struct inode *inode;
 	int err;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b41cfba14fa..9c4340f5c3e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -179,7 +179,7 @@ xfs_vn_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
 	umode_t		mode,
-	struct nameidata *nd)
+	bool		flags)
 {
 	return xfs_vn_mknod(dir, dentry, mode, 0);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7a71709b7fa..df869d248e7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1674,7 +1674,7 @@ struct inode_operations {
 	int (*readlink) (struct dentry *, char __user *,int);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 
-	int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *);
+	int (*create) (struct inode *,struct dentry *, umode_t, bool);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8ce57691e7b..da2c188688b 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -413,7 +413,7 @@ static void mqueue_evict_inode(struct inode *inode)
 }
 
 static int mqueue_create(struct inode *dir, struct dentry *dentry,
-				umode_t mode, struct nameidata *nd)
+				umode_t mode, bool excl)
 {
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
diff --git a/mm/shmem.c b/mm/shmem.c
index bd106361be4..c15b998e5a8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1877,7 +1877,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 
 static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool excl)
 {
 	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
 }
-- 
cgit v1.2.3-70-g09d2


From 312b63fba9e88a0dcf800834b8ede8716bcc1e17 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 Jun 2012 18:09:36 -0400
Subject: don't pass nameidata * to vfs_create()

all we want is a boolean flag, same as the method gets now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cachefiles/namei.c | 2 +-
 fs/ecryptfs/inode.c   | 2 +-
 fs/namei.c            | 9 +++++----
 fs/nfsd/vfs.c         | 4 ++--
 include/linux/fs.h    | 2 +-
 ipc/mqueue.c          | 2 +-
 6 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7f0771d3894..b0b5f7cdfff 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -567,7 +567,7 @@ lookup_again:
 			if (ret < 0)
 				goto create_error;
 			start = jiffies;
-			ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
+			ret = vfs_create(dir->d_inode, next, S_IFREG, true);
 			cachefiles_hist(cachefiles_create_histogram, start);
 			if (ret < 0)
 				goto create_error;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f079dafea75..da52cdbe838 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -173,7 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
 		inode = ERR_CAST(lower_dir_dentry);
 		goto out;
 	}
-	rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL);
+	rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true);
 	if (rc) {
 		printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
 		       "rc = [%d]\n", __func__, rc);
diff --git a/fs/namei.c b/fs/namei.c
index fd71156bfd7..ffcd4e114b6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2079,7 +2079,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 }
 
 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		struct nameidata *nd)
+		bool want_excl)
 {
 	int error = may_create(dir, dentry);
 	if (error)
@@ -2092,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL));
+	error = dir->i_op->create(dir, dentry, mode, want_excl);
 	if (!error)
 		fsnotify_create(dir, dentry);
 	return error;
@@ -2396,7 +2396,8 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 		error = security_path_mknod(&nd->path, dentry, mode, 0);
 		if (error)
 			goto out_dput;
-		error = vfs_create(dir->d_inode, dentry, mode, nd);
+		error = vfs_create(dir->d_inode, dentry, mode,
+				   nd->flags & LOOKUP_EXCL);
 		if (error)
 			goto out_dput;
 	}
@@ -2883,7 +2884,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
 		goto out_drop_write;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
-			error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
+			error = vfs_create(path.dentry->d_inode,dentry,mode,true);
 			break;
 		case S_IFCHR: case S_IFBLK:
 			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c8bd9c3be7f..05d9eee6be3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1329,7 +1329,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	err = 0;
 	switch (type) {
 	case S_IFREG:
-		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+		host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
 		if (!host_err)
 			nfsd_check_ignore_resizing(iap);
 		break;
@@ -1492,7 +1492,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 	}
 
-	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
 	if (host_err < 0) {
 		fh_drop_write(fhp);
 		goto out_nfserr;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index df869d248e7..2f857e9eeb3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1571,7 +1571,7 @@ extern void unlock_super(struct super_block *);
 /*
  * VFS helper functions..
  */
-extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index da2c188688b..2dee38d53c7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -751,7 +751,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
 	ret = mnt_want_write(ipc_ns->mq_mnt);
 	if (ret)
 		goto out;
-	ret = vfs_create(dir->d_inode, dentry, mode, NULL);
+	ret = vfs_create(dir->d_inode, dentry, mode, true);
 	dentry->d_fsdata = NULL;
 	if (ret)
 		goto out_drop_write;
-- 
cgit v1.2.3-70-g09d2


From 1acf0af9b981027f3e73e93f0d3f85abdc794f71 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 14 Jun 2012 16:13:46 +0100
Subject: VFS: Fix the banner comment on lookup_open()

Since commit 197e37d9, the banner comment on lookup_open() no longer matches
what the function returns.  It used to return a struct file pointer or NULL and
now it returns an integer and is passed the struct file pointer it is to use
amongst its arguments.  Update the comment to reflect this.

Also add a banner comment to atomic_open().

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index ffcd4e114b6..5abab917690 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2192,6 +2192,19 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 	return security_inode_create(dir->dentry->d_inode, dentry, mode);
 }
 
+/*
+ * Attempt to atomically look up, create and open a file from a negative
+ * dentry.
+ *
+ * Returns 0 if successful.  The file will have been created and attached to
+ * @file by the filesystem calling finish_open().
+ *
+ * Returns 1 if the file was looked up only or didn't need creating.  The
+ * caller will need to perform the open themselves.  @path will have been
+ * updated to point to the new dentry.  This may be negative.
+ *
+ * Returns an error code otherwise.
+ */
 static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 			struct path *path, struct file *file,
 			const struct open_flags *op,
@@ -2336,12 +2349,22 @@ looked_up:
 }
 
 /*
- * Lookup, maybe create and open the last component
+ * Look up and maybe create and open the last component.
  *
  * Must be called with i_mutex held on parent.
  *
- * Returns open file or NULL on success, error otherwise.  NULL means no open
- * was performed, only lookup.
+ * Returns 0 if the file was successfully atomically created (if necessary) and
+ * opened.  In this case the file will be returned attached to @file.
+ *
+ * Returns 1 if the file was not completely opened at this time, though lookups
+ * and creations will have been performed and the dentry returned in @path will
+ * be positive upon return if O_CREAT was specified.  If O_CREAT wasn't
+ * specified then a negative dentry may be returned.
+ *
+ * An error code is returned otherwise.
+ *
+ * FILE_CREATE will be set in @*opened if the dentry was created and will be
+ * cleared otherwise prior to returning.
  */
 static int lookup_open(struct nameidata *nd, struct path *path,
 			struct file *file,
-- 
cgit v1.2.3-70-g09d2


From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 15 Jun 2012 03:01:42 +0400
Subject: get rid of kern_path_parent()

all callers want the same thing, actually - a kinda-sorta analog of
kern_path_create().  I.e. they want parent vfsmount/dentry (with
->i_mutex held, to make sure the child dentry is still their child)
+ the child dentry.

Signed-off-by Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c | 98 +++++++++++++++++++++----------------------------
 fs/namei.c              | 22 ++++++++++-
 include/linux/namei.h   |  2 +-
 kernel/audit_watch.c    | 25 ++-----------
 4 files changed, 65 insertions(+), 82 deletions(-)

(limited to 'fs')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 765c3a28077..d91a3a0b232 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -227,33 +227,24 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev)
 
 static int dev_rmdir(const char *name)
 {
-	struct nameidata nd;
+	struct path parent;
 	struct dentry *dentry;
 	int err;
 
-	err = kern_path_parent(name, &nd);
-	if (err)
-		return err;
-
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
-	if (!IS_ERR(dentry)) {
-		if (dentry->d_inode) {
-			if (dentry->d_inode->i_private == &thread)
-				err = vfs_rmdir(nd.path.dentry->d_inode,
-						dentry);
-			else
-				err = -EPERM;
-		} else {
-			err = -ENOENT;
-		}
-		dput(dentry);
+	dentry = kern_path_locked(name, &parent);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	if (dentry->d_inode) {
+		if (dentry->d_inode->i_private == &thread)
+			err = vfs_rmdir(parent.dentry->d_inode, dentry);
+		else
+			err = -EPERM;
 	} else {
-		err = PTR_ERR(dentry);
+		err = -ENOENT;
 	}
-
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
+	dput(dentry);
+	mutex_unlock(&parent.dentry->d_inode->i_mutex);
+	path_put(&parent);
 	return err;
 }
 
@@ -305,50 +296,43 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
 
 static int handle_remove(const char *nodename, struct device *dev)
 {
-	struct nameidata nd;
+	struct path parent;
 	struct dentry *dentry;
-	struct kstat stat;
 	int deleted = 1;
 	int err;
 
-	err = kern_path_parent(nodename, &nd);
-	if (err)
-		return err;
+	dentry = kern_path_locked(nodename, &parent);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
 
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
-	if (!IS_ERR(dentry)) {
-		if (dentry->d_inode) {
-			err = vfs_getattr(nd.path.mnt, dentry, &stat);
-			if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
-				struct iattr newattrs;
-				/*
-				 * before unlinking this node, reset permissions
-				 * of possible references like hardlinks
-				 */
-				newattrs.ia_uid = 0;
-				newattrs.ia_gid = 0;
-				newattrs.ia_mode = stat.mode & ~0777;
-				newattrs.ia_valid =
-					ATTR_UID|ATTR_GID|ATTR_MODE;
-				mutex_lock(&dentry->d_inode->i_mutex);
-				notify_change(dentry, &newattrs);
-				mutex_unlock(&dentry->d_inode->i_mutex);
-				err = vfs_unlink(nd.path.dentry->d_inode,
-						 dentry);
-				if (!err || err == -ENOENT)
-					deleted = 1;
-			}
-		} else {
-			err = -ENOENT;
+	if (dentry->d_inode) {
+		struct kstat stat;
+		err = vfs_getattr(parent.mnt, dentry, &stat);
+		if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+			struct iattr newattrs;
+			/*
+			 * before unlinking this node, reset permissions
+			 * of possible references like hardlinks
+			 */
+			newattrs.ia_uid = 0;
+			newattrs.ia_gid = 0;
+			newattrs.ia_mode = stat.mode & ~0777;
+			newattrs.ia_valid =
+				ATTR_UID|ATTR_GID|ATTR_MODE;
+			mutex_lock(&dentry->d_inode->i_mutex);
+			notify_change(dentry, &newattrs);
+			mutex_unlock(&dentry->d_inode->i_mutex);
+			err = vfs_unlink(parent.dentry->d_inode, dentry);
+			if (!err || err == -ENOENT)
+				deleted = 1;
 		}
-		dput(dentry);
 	} else {
-		err = PTR_ERR(dentry);
+		err = -ENOENT;
 	}
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+	dput(dentry);
+	mutex_unlock(&parent.dentry->d_inode->i_mutex);
 
-	path_put(&nd.path);
+	path_put(&parent);
 	if (deleted && strchr(nodename, '/'))
 		delete_path(nodename);
 	return err;
diff --git a/fs/namei.c b/fs/namei.c
index 5abab917690..6b29a51bef5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1814,9 +1814,27 @@ static int do_path_lookup(int dfd, const char *name,
 	return retval;
 }
 
-int kern_path_parent(const char *name, struct nameidata *nd)
+/* does lookup, returns the object with parent locked */
+struct dentry *kern_path_locked(const char *name, struct path *path)
 {
-	return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
+	struct nameidata nd;
+	struct dentry *d;
+	int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd);
+	if (err)
+		return ERR_PTR(err);
+	if (nd.last_type != LAST_NORM) {
+		path_put(&nd.path);
+		return ERR_PTR(-EINVAL);
+	}
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	if (IS_ERR(d)) {
+		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+		path_put(&nd.path);
+		return d;
+	}
+	*path = nd.path;
+	return d;
 }
 
 int kern_path(const char *name, unsigned int flags, struct path *path)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 23d85987921..f5931489e15 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -67,7 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, int);
 extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
-extern int kern_path_parent(const char *, struct nameidata *);
+extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
 
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index e683869365d..3823281401b 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 /* Get path information necessary for adding watches. */
 static int audit_get_nd(struct audit_watch *watch, struct path *parent)
 {
-	struct nameidata nd;
-	struct dentry *d;
-	int err;
-
-	err = kern_path_parent(watch->path, &nd);
-	if (err)
-		return err;
-
-	if (nd.last_type != LAST_NORM) {
-		path_put(&nd.path);
-		return -EINVAL;
-	}
-
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
-	if (IS_ERR(d)) {
-		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-		path_put(&nd.path);
+	struct dentry *d = kern_path_locked(watch->path, parent);
+	if (IS_ERR(d))
 		return PTR_ERR(d);
-	}
+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
 	if (d->d_inode) {
 		/* update watch filter fields */
 		watch->dev = d->d_inode->i_sb->s_dev;
 		watch->ino = d->d_inode->i_ino;
 	}
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-
-	*parent = nd.path;
 	dput(d);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 469796d10590341c53cff0a2959254eaf5d465de Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Jun 2012 20:51:39 -0400
Subject: sysfs: switch to ->s_d_op and ->d_release()

a) ->d_iput() is wrong here - what we do to inode is completely usual, it's
dentry->d_fsdata that we want to drop.  Just use ->d_release().

b) switch to ->s_d_op - no need to play with d_set_d_op()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysfs/dir.c   | 16 ++++++----------
 fs/sysfs/mount.c |  1 +
 fs/sysfs/sysfs.h |  1 +
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index efd373e3e0a..77c44ce493f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -300,7 +300,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
 static int sysfs_dentry_delete(const struct dentry *dentry)
 {
 	struct sysfs_dirent *sd = dentry->d_fsdata;
-	return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
+	return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
 }
 
 static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
@@ -355,18 +355,15 @@ out_bad:
 	return 0;
 }
 
-static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
+static void sysfs_dentry_release(struct dentry *dentry)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-
-	sysfs_put(sd);
-	iput(inode);
+	sysfs_put(dentry->d_fsdata);
 }
 
-static const struct dentry_operations sysfs_dentry_ops = {
+const struct dentry_operations sysfs_dentry_ops = {
 	.d_revalidate	= sysfs_dentry_revalidate,
 	.d_delete	= sysfs_dentry_delete,
-	.d_iput		= sysfs_dentry_iput,
+	.d_release	= sysfs_dentry_release,
 };
 
 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -786,6 +783,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 		ret = ERR_PTR(-ENOENT);
 		goto out_unlock;
 	}
+	dentry->d_fsdata = sysfs_get(sd);
 
 	/* attach dentry and inode */
 	inode = sysfs_get_inode(dir->i_sb, sd);
@@ -797,8 +795,6 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	/* instantiate and hash dentry */
 	ret = d_find_alias(inode);
 	if (!ret) {
-		d_set_d_op(dentry, &sysfs_dentry_ops);
-		dentry->d_fsdata = sysfs_get(sd);
 		d_add(dentry, inode);
 	} else {
 		d_move(ret, dentry);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 52c3bdb66a8..c15a7a3572e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -68,6 +68,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	root->d_fsdata = &sysfs_root;
 	sb->s_root = root;
+	sb->s_d_op = &sysfs_dentry_ops;
 	return 0;
 }
 
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 661a9639570..d73c0932bbd 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -157,6 +157,7 @@ extern struct kmem_cache *sysfs_dir_cachep;
  */
 extern struct mutex sysfs_mutex;
 extern spinlock_t sysfs_assoc_lock;
+extern const struct dentry_operations sysfs_dentry_ops;
 
 extern const struct file_operations sysfs_dir_operations;
 extern const struct inode_operations sysfs_dir_inode_operations;
-- 
cgit v1.2.3-70-g09d2


From e77fb7cef87856d9d35f2f4d617d0b97148ee7c2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Jun 2012 20:56:54 -0400
Subject: sysfs: just use d_materialise_unique()

same as for nfs et.al.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysfs/dir.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 77c44ce493f..a5cf784f9cc 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -793,14 +793,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	/* instantiate and hash dentry */
-	ret = d_find_alias(inode);
-	if (!ret) {
-		d_add(dentry, inode);
-	} else {
-		d_move(ret, dentry);
-		iput(inode);
-	}
-
+	ret = d_materialise_unique(dentry, inode);
  out_unlock:
 	mutex_unlock(&sysfs_mutex);
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From ee3efa91e240f513898050ef305a49a653c8ed90 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2012 15:59:33 -0400
Subject: __d_unalias() should refuse to move mountpoints

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 015586f1ffc..8086636bf79 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2387,14 +2387,13 @@ static struct dentry *__d_unalias(struct inode *inode,
 		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
-	struct dentry *ret;
+	struct dentry *ret = ERR_PTR(-EBUSY);
 
 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
 		goto out_unalias;
 
 	/* See lock_rename() */
-	ret = ERR_PTR(-EBUSY);
 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
 		goto out_err;
 	m1 = &dentry->d_sb->s_vfs_rename_mutex;
@@ -2402,8 +2401,10 @@ static struct dentry *__d_unalias(struct inode *inode,
 		goto out_err;
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
-	__d_move(alias, dentry);
-	ret = alias;
+	if (likely(!d_mountpoint(alias))) {
+		__d_move(alias, dentry);
+		ret = alias;
+	}
 out_err:
 	spin_unlock(&inode->i_lock);
 	if (m2)
-- 
cgit v1.2.3-70-g09d2


From c3b1a350846a11dd1054cb7832e098aa37025deb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 20:28:22 -0400
Subject: debugfs: make sure that debugfs_create_file() gets used only for
 regulars

It, debugfs_create_dir() and debugfs_create_link() use the common helper
now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c | 56 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b80bc846a15..d423b966bc7 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -335,6 +335,30 @@ static int debugfs_create_by_name(const char *name, umode_t mode,
 	return error;
 }
 
+struct dentry *__create_file(const char *name, umode_t mode,
+				   struct dentry *parent, void *data,
+				   const struct file_operations *fops)
+{
+	struct dentry *dentry = NULL;
+	int error;
+
+	pr_debug("debugfs: creating file '%s'\n",name);
+
+	error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
+			      &debugfs_mount_count);
+	if (error)
+		goto exit;
+
+	error = debugfs_create_by_name(name, mode, parent, &dentry,
+				       data, fops);
+	if (error) {
+		dentry = NULL;
+		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+	}
+exit:
+	return dentry;
+}
+
 /**
  * debugfs_create_file - create a file in the debugfs filesystem
  * @name: a pointer to a string containing the name of the file to create.
@@ -365,25 +389,15 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops)
 {
-	struct dentry *dentry = NULL;
-	int error;
-
-	pr_debug("debugfs: creating file '%s'\n",name);
-
-	error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
-			      &debugfs_mount_count);
-	if (error)
-		goto exit;
-
-	error = debugfs_create_by_name(name, mode, parent, &dentry,
-				       data, fops);
-	if (error) {
-		dentry = NULL;
-		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-		goto exit;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+	case 0:
+		break;
+	default:
+		BUG();
 	}
-exit:
-	return dentry;
+
+	return __create_file(name, mode, parent, data, fops);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_file);
 
@@ -407,8 +421,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
  */
 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 {
-	return debugfs_create_file(name, 
-				   S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+	return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
 				   parent, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -446,8 +459,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 	if (!link)
 		return NULL;
 
-	result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link,
-				     NULL);
+	result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL);
 	if (!result)
 		kfree(link);
 	return result;
-- 
cgit v1.2.3-70-g09d2


From cfa57c11b0d5a80f7bffa1ab35bc46892127817f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 20:33:28 -0400
Subject: debugfs: fold debugfs_create_by_name() into the only caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c | 53 ++++++++++++++++++++---------------------------------
 1 file changed, 20 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d423b966bc7..79f53f3ce7c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -293,13 +293,19 @@ static struct file_system_type debug_fs_type = {
 	.kill_sb =	kill_litter_super,
 };
 
-static int debugfs_create_by_name(const char *name, umode_t mode,
-				  struct dentry *parent,
-				  struct dentry **dentry,
-				  void *data,
-				  const struct file_operations *fops)
+struct dentry *__create_file(const char *name, umode_t mode,
+				   struct dentry *parent, void *data,
+				   const struct file_operations *fops)
 {
-	int error = 0;
+	struct dentry *dentry = NULL;
+	int error;
+
+	pr_debug("debugfs: creating file '%s'\n",name);
+
+	error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
+			      &debugfs_mount_count);
+	if (error)
+		goto exit;
 
 	/* If the parent is not specified, we create it in the root.
 	 * We need the root dentry to do this, which is in the super 
@@ -309,48 +315,29 @@ static int debugfs_create_by_name(const char *name, umode_t mode,
 	if (!parent)
 		parent = debugfs_mount->mnt_root;
 
-	*dentry = NULL;
+	dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
-	*dentry = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(*dentry)) {
+	dentry = lookup_one_len(name, parent, strlen(name));
+	if (!IS_ERR(dentry)) {
 		switch (mode & S_IFMT) {
 		case S_IFDIR:
-			error = debugfs_mkdir(parent->d_inode, *dentry, mode,
+			error = debugfs_mkdir(parent->d_inode, dentry, mode,
 					      data, fops);
 			break;
 		case S_IFLNK:
-			error = debugfs_link(parent->d_inode, *dentry, mode,
+			error = debugfs_link(parent->d_inode, dentry, mode,
 					     data, fops);
 			break;
 		default:
-			error = debugfs_create(parent->d_inode, *dentry, mode,
+			error = debugfs_create(parent->d_inode, dentry, mode,
 					       data, fops);
 			break;
 		}
-		dput(*dentry);
+		dput(dentry);
 	} else
-		error = PTR_ERR(*dentry);
+		error = PTR_ERR(dentry);
 	mutex_unlock(&parent->d_inode->i_mutex);
 
-	return error;
-}
-
-struct dentry *__create_file(const char *name, umode_t mode,
-				   struct dentry *parent, void *data,
-				   const struct file_operations *fops)
-{
-	struct dentry *dentry = NULL;
-	int error;
-
-	pr_debug("debugfs: creating file '%s'\n",name);
-
-	error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
-			      &debugfs_mount_count);
-	if (error)
-		goto exit;
-
-	error = debugfs_create_by_name(name, mode, parent, &dentry,
-				       data, fops);
 	if (error) {
 		dentry = NULL;
 		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-- 
cgit v1.2.3-70-g09d2


From ac481d6ca4081bdd348cbd84963d1ece843a3407 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2012 20:40:20 -0400
Subject: debugfs: get rid of useless arguments to debugfs_{mkdir,symlink}

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 79f53f3ce7c..d17c20fd74e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -54,13 +54,12 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
 			break;
 		case S_IFLNK:
 			inode->i_op = &debugfs_link_operations;
-			inode->i_fop = fops;
 			inode->i_private = data;
 			break;
 		case S_IFDIR:
 			inode->i_op = &simple_dir_inode_operations;
-			inode->i_fop = fops ? fops : &simple_dir_operations;
-			inode->i_private = data;
+			inode->i_fop = &simple_dir_operations;
+			inode->i_private = NULL;
 
 			/* directory inodes start off with i_nlink == 2
 			 * (for "." entry) */
@@ -91,13 +90,12 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
 	return error;
 }
 
-static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
-			 void *data, const struct file_operations *fops)
+static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int res;
 
 	mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
-	res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
+	res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL);
 	if (!res) {
 		inc_nlink(dir);
 		fsnotify_mkdir(dir, dentry);
@@ -106,10 +104,10 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
-			void *data, const struct file_operations *fops)
+			void *data)
 {
 	mode = (mode & S_IALLUGO) | S_IFLNK;
-	return debugfs_mknod(dir, dentry, mode, 0, data, fops);
+	return debugfs_mknod(dir, dentry, mode, 0, data, NULL);
 }
 
 static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
@@ -321,12 +319,12 @@ struct dentry *__create_file(const char *name, umode_t mode,
 	if (!IS_ERR(dentry)) {
 		switch (mode & S_IFMT) {
 		case S_IFDIR:
-			error = debugfs_mkdir(parent->d_inode, dentry, mode,
-					      data, fops);
+			error = debugfs_mkdir(parent->d_inode, dentry, mode);
+					      
 			break;
 		case S_IFLNK:
 			error = debugfs_link(parent->d_inode, dentry, mode,
-					     data, fops);
+					     data);
 			break;
 		default:
 			error = debugfs_create(parent->d_inode, dentry, mode,
-- 
cgit v1.2.3-70-g09d2


From 408ef013cc9e2f94a14f7ccbbe52ddfb18437a99 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 18 Jun 2012 10:47:03 -0400
Subject: fs: move path_put on failure out of ->follow_link

Currently the non-nd_set_link based versions of ->follow_link are expected
to do a path_put(&nd->path) on failure.  This calling convention is unexpected,
undocumented and doesn't match what the nd_set_link-based instances do.

Move the path_put out of the only non-nd_set_link based ->follow_link
instance into the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c     |  3 +--
 fs/proc/base.c | 12 ++++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6b29a51bef5..a9b94c62c30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -624,7 +624,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(*p);
 	if (IS_ERR(*p))
-		goto out_put_link;
+		goto out_put_nd_path;
 
 	error = 0;
 	s = nd_get_link(nd);
@@ -646,7 +646,6 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 
 out_put_nd_path:
 	path_put(&nd->path);
-out_put_link:
 	path_put(link);
 	return error;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8eaa5ea1c61..3bd5ac1ff01 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1427,16 +1427,20 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
+	struct path path;
 	int error = -EACCES;
 
-	/* We don't need a base pointer in the /proc filesystem */
-	path_put(&nd->path);
-
 	/* Are we allowed to snoop on the tasks file descriptors? */
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+	if (error)
+		goto out;
+
+	path_put(&nd->path);
+	nd->path = path;
+	return NULL;
 out:
 	return ERR_PTR(error);
 }
-- 
cgit v1.2.3-70-g09d2


From b5fb63c18315c5510c1d0636179c057e0c761c77 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 18 Jun 2012 10:47:04 -0400
Subject: fs: add nd_jump_link

Add a helper that abstracts out the jump to an already parsed struct path
from ->follow_link operation from procfs.  Not only does this clean up
the code by moving the two sides of this game into a single helper, but
it also prepares for making struct nameidata private to namei.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 27 +++++++++++++++++----------
 fs/proc/base.c        |  3 +--
 include/linux/namei.h |  2 ++
 3 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index a9b94c62c30..0e1b9c3eb36 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -586,6 +586,21 @@ static inline void path_to_nameidata(const struct path *path,
 	nd->path.dentry = path->dentry;
 }
 
+/*
+ * Helper to directly jump to a known parsed path from ->follow_link,
+ * caller must have taken a reference to path beforehand.
+ */
+void nd_jump_link(struct nameidata *nd, struct path *path)
+{
+	path_put(&nd->path);
+
+	nd->path = *path;
+	nd->inode = nd->path.dentry->d_inode;
+	nd->flags |= LOOKUP_JUMPED;
+
+	BUG_ON(nd->inode->i_op->follow_link);
+}
+
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
 {
 	struct inode *inode = link->dentry->d_inode;
@@ -630,17 +645,9 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 	s = nd_get_link(nd);
 	if (s) {
 		error = __vfs_follow_link(nd, s);
-	} else if (nd->last_type == LAST_BIND) {
-		nd->flags |= LOOKUP_JUMPED;
-		nd->inode = nd->path.dentry->d_inode;
-		if (nd->inode->i_op->follow_link) {
-			/* stepped on a _really_ weird one */
-			path_put(&nd->path);
-			error = -ELOOP;
-		}
+		if (unlikely(error))
+			put_link(nd, link, *p);
 	}
-	if (unlikely(error))
-		put_link(nd, link, *p);
 
 	return error;
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3bd5ac1ff01..2772208338f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1438,8 +1438,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (error)
 		goto out;
 
-	path_put(&nd->path);
-	nd->path = path;
+	nd_jump_link(nd, &path);
 	return NULL;
 out:
 	return ERR_PTR(error);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f5931489e15..d2ef8b34b96 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -80,6 +80,8 @@ extern int follow_up(struct path *);
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
+extern void nd_jump_link(struct nameidata *nd, struct path *path);
+
 static inline void nd_set_link(struct nameidata *nd, char *path)
 {
 	nd->saved_names[nd->depth] = path;
-- 
cgit v1.2.3-70-g09d2


From c4107b3097465e25f7d6a9b0ac0518b07b24e774 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 20 Jun 2012 09:55:58 +1000
Subject: notify_change(): check that i_mutex is held

Cc: Djalal Harouni <tixxdz@opendz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 0da90951d27..29e38a1f7f7 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -171,6 +171,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	struct timespec now;
 	unsigned int ia_valid = attr->ia_valid;
 
+	WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+
 	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
 		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 			return -EPERM;
@@ -250,5 +252,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 
 	return error;
 }
-
 EXPORT_SYMBOL(notify_change);
-- 
cgit v1.2.3-70-g09d2


From 85d7d618c17a09cfd824c1ad4483c19e6f9637ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2012 22:41:54 +0400
Subject: mark_files_ro(): don't bother with mntget/mntput

mnt_drop_write_file() is safe under any lock

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index a305d9e2d1b..9ace2781931 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -483,10 +483,8 @@ void mark_files_ro(struct super_block *sb)
 {
 	struct file *f;
 
-retry:
 	lg_global_lock(&files_lglock);
 	do_file_list_for_each_entry(sb, f) {
-		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
 		       continue;
 		if (!file_count(f))
@@ -499,12 +497,7 @@ retry:
 		if (file_check_writeable(f) != 0)
 			continue;
 		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		/* This can sleep, so we can't hold the spinlock. */
-		lg_global_unlock(&files_lglock);
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
+		mnt_drop_write_file(f);
 	} while_file_list_for_each_entry;
 	lg_global_unlock(&files_lglock);
 }
-- 
cgit v1.2.3-70-g09d2


From c3c4f69424db0760239762d36d0b1b6ae524008b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2012 22:49:45 +0400
Subject: do_dentry_open(): close the race with mark_files_ro() in failure exit

we want to take it out of mark_files_ro() reach *before* we start
checking if we ought to drop write access.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 124ccb1d38a..764cc9c201a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -727,6 +727,7 @@ static int do_dentry_open(struct file *f,
 
 cleanup_all:
 	fops_put(f->f_op);
+	file_sb_list_del(f);
 	if (f->f_mode & FMODE_WRITE) {
 		put_write_access(inode);
 		if (!special_file(inode->i_mode)) {
@@ -740,7 +741,6 @@ cleanup_all:
 			mnt_drop_write(f->f_path.mnt);
 		}
 	}
-	file_sb_list_del(f);
 cleanup_file:
 	path_put(&f->f_path);
 	f->f_path.mnt = NULL;
-- 
cgit v1.2.3-70-g09d2


From 55e4def0a6e79e7eb53017c4935adfed76510cd7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Jun 2012 12:55:09 +0100
Subject: VFS: Make chown() and lchown() call fchownat()

Make the chown() and lchown() syscalls jump to the fchownat() syscall with the
appropriate extra arguments.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 41 +++++++----------------------------------
 1 file changed, 7 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 764cc9c201a..75bea868ef8 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 	return error;
 }
 
-SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
-{
-	struct path path;
-	int error;
-
-	error = user_path(filename, &path);
-	if (error)
-		goto out;
-	error = mnt_want_write(path.mnt);
-	if (error)
-		goto out_release;
-	error = chown_common(&path, user, group);
-	mnt_drop_write(path.mnt);
-out_release:
-	path_put(&path);
-out:
-	return error;
-}
-
 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
 		gid_t, group, int, flag)
 {
@@ -583,23 +564,15 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
+SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
 {
-	struct path path;
-	int error;
+	return sys_fchownat(AT_FDCWD, filename, user, group, 0);
+}
 
-	error = user_lpath(filename, &path);
-	if (error)
-		goto out;
-	error = mnt_want_write(path.mnt);
-	if (error)
-		goto out_release;
-	error = chown_common(&path, user, group);
-	mnt_drop_write(path.mnt);
-out_release:
-	path_put(&path);
-out:
-	return error;
+SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
+{
+	return sys_fchownat(AT_FDCWD, filename, user, group,
+			    AT_SYMLINK_NOFOLLOW);
 }
 
 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
-- 
cgit v1.2.3-70-g09d2


From be34d1a3bc4b6f357a49acb55ae870c81337e4f0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Jun 2012 12:55:18 +0100
Subject: VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors

copy_tree() can theoretically fail in a case other than ENOMEM, but always
returns NULL which is interpreted by callers as -ENOMEM.  Change it to return
an explicit error.

Also change clone_mnt() for consistency and because union mounts will add new
error cases.

Thanks to Andreas Gruenbacher <agruen@suse.de> for a bug fix.
[AV: folded braino fix by Dan Carpenter]

Original-author: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Valerie Aurora <valerie.aurora@gmail.com>
Cc: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c      | 120 ++++++++++++++++++++++++++++------------------------
 fs/pnode.c          |   5 ++-
 kernel/audit_tree.c |  10 ++---
 3 files changed, 73 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 8f412abcb67..be1b07a774f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -708,56 +708,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 					int flag)
 {
 	struct super_block *sb = old->mnt.mnt_sb;
-	struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
+	struct mount *mnt;
+	int err;
 
-	if (mnt) {
-		if (flag & (CL_SLAVE | CL_PRIVATE))
-			mnt->mnt_group_id = 0; /* not a peer of original */
-		else
-			mnt->mnt_group_id = old->mnt_group_id;
-
-		if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
-			int err = mnt_alloc_group_id(mnt);
-			if (err)
-				goto out_free;
-		}
+	mnt = alloc_vfsmnt(old->mnt_devname);
+	if (!mnt)
+		return ERR_PTR(-ENOMEM);
 
-		mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
-		atomic_inc(&sb->s_active);
-		mnt->mnt.mnt_sb = sb;
-		mnt->mnt.mnt_root = dget(root);
-		mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-		mnt->mnt_parent = mnt;
-		br_write_lock(&vfsmount_lock);
-		list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
-		br_write_unlock(&vfsmount_lock);
+	if (flag & (CL_SLAVE | CL_PRIVATE))
+		mnt->mnt_group_id = 0; /* not a peer of original */
+	else
+		mnt->mnt_group_id = old->mnt_group_id;
 
-		if (flag & CL_SLAVE) {
-			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
-			mnt->mnt_master = old;
-			CLEAR_MNT_SHARED(mnt);
-		} else if (!(flag & CL_PRIVATE)) {
-			if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
-				list_add(&mnt->mnt_share, &old->mnt_share);
-			if (IS_MNT_SLAVE(old))
-				list_add(&mnt->mnt_slave, &old->mnt_slave);
-			mnt->mnt_master = old->mnt_master;
-		}
-		if (flag & CL_MAKE_SHARED)
-			set_mnt_shared(mnt);
-
-		/* stick the duplicate mount on the same expiry list
-		 * as the original if that was on one */
-		if (flag & CL_EXPIRE) {
-			if (!list_empty(&old->mnt_expire))
-				list_add(&mnt->mnt_expire, &old->mnt_expire);
-		}
+	if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
+		err = mnt_alloc_group_id(mnt);
+		if (err)
+			goto out_free;
+	}
+
+	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+	atomic_inc(&sb->s_active);
+	mnt->mnt.mnt_sb = sb;
+	mnt->mnt.mnt_root = dget(root);
+	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+	mnt->mnt_parent = mnt;
+	br_write_lock(&vfsmount_lock);
+	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
+	br_write_unlock(&vfsmount_lock);
+
+	if (flag & CL_SLAVE) {
+		list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+		mnt->mnt_master = old;
+		CLEAR_MNT_SHARED(mnt);
+	} else if (!(flag & CL_PRIVATE)) {
+		if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
+			list_add(&mnt->mnt_share, &old->mnt_share);
+		if (IS_MNT_SLAVE(old))
+			list_add(&mnt->mnt_slave, &old->mnt_slave);
+		mnt->mnt_master = old->mnt_master;
+	}
+	if (flag & CL_MAKE_SHARED)
+		set_mnt_shared(mnt);
+
+	/* stick the duplicate mount on the same expiry list
+	 * as the original if that was on one */
+	if (flag & CL_EXPIRE) {
+		if (!list_empty(&old->mnt_expire))
+			list_add(&mnt->mnt_expire, &old->mnt_expire);
 	}
+
 	return mnt;
 
  out_free:
 	free_vfsmnt(mnt);
-	return NULL;
+	return ERR_PTR(err);
 }
 
 static inline void mntfree(struct mount *mnt)
@@ -1242,11 +1246,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	struct path path;
 
 	if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	res = q = clone_mnt(mnt, dentry, flag);
-	if (!q)
-		goto Enomem;
+	if (IS_ERR(q))
+		return q;
+
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
@@ -1268,8 +1273,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 			path.mnt = &q->mnt;
 			path.dentry = p->mnt_mountpoint;
 			q = clone_mnt(p, p->mnt.mnt_root, flag);
-			if (!q)
-				goto Enomem;
+			if (IS_ERR(q))
+				goto out;
 			br_write_lock(&vfsmount_lock);
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &path);
@@ -1277,7 +1282,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 		}
 	}
 	return res;
-Enomem:
+out:
 	if (res) {
 		LIST_HEAD(umount_list);
 		br_write_lock(&vfsmount_lock);
@@ -1285,9 +1290,11 @@ Enomem:
 		br_write_unlock(&vfsmount_lock);
 		release_mounts(&umount_list);
 	}
-	return NULL;
+	return q;
 }
 
+/* Caller should check returned pointer for errors */
+
 struct vfsmount *collect_mounts(struct path *path)
 {
 	struct mount *tree;
@@ -1295,7 +1302,9 @@ struct vfsmount *collect_mounts(struct path *path)
 	tree = copy_tree(real_mount(path->mnt), path->dentry,
 			 CL_COPY_ALL | CL_PRIVATE);
 	up_write(&namespace_sem);
-	return tree ? &tree->mnt : NULL;
+	if (IS_ERR(tree))
+		return NULL;
+	return &tree->mnt;
 }
 
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -1590,14 +1599,15 @@ static int do_loopback(struct path *path, char *old_name,
 	if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
 		goto out2;
 
-	err = -ENOMEM;
 	if (recurse)
 		mnt = copy_tree(old, old_path.dentry, 0);
 	else
 		mnt = clone_mnt(old, old_path.dentry, 0);
 
-	if (!mnt)
-		goto out2;
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
+		goto out;
+	}
 
 	err = graft_tree(mnt, path);
 	if (err) {
@@ -2211,10 +2221,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
 	new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
-	if (!new) {
+	if (IS_ERR(new)) {
 		up_write(&namespace_sem);
 		kfree(new_ns);
-		return ERR_PTR(-ENOMEM);
+		return ERR_CAST(new);
 	}
 	new_ns->root = new;
 	br_write_lock(&vfsmount_lock);
diff --git a/fs/pnode.c b/fs/pnode.c
index bed378db075..3e000a51ac0 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
 
 		source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
 
-		if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
-			ret = -ENOMEM;
+		child = copy_tree(source, source->mnt.mnt_root, type);
+		if (IS_ERR(child)) {
+			ret = PTR_ERR(child);
 			list_splice(tree_list, tmp_list.prev);
 			goto out;
 		}
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 5bf0790497e..3a5ca582ba1 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -595,7 +595,7 @@ void audit_trim_trees(void)
 
 		root_mnt = collect_mounts(&path);
 		path_put(&path);
-		if (!root_mnt)
+		if (IS_ERR(root_mnt))
 			goto skip_it;
 
 		spin_lock(&hash_lock);
@@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
 		goto Err;
 	mnt = collect_mounts(&path);
 	path_put(&path);
-	if (!mnt) {
-		err = -ENOMEM;
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
 		goto Err;
 	}
 
@@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new)
 		return err;
 	tagged = collect_mounts(&path2);
 	path_put(&path2);
-	if (!tagged)
-		return -ENOMEM;
+	if (IS_ERR(tagged))
+		return PTR_ERR(tagged);
 
 	err = kern_path(old, 0, &path1);
 	if (err) {
-- 
cgit v1.2.3-70-g09d2


From f015f1267b23d3530d3f874243fb83cb5f443005 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Jun 2012 12:55:28 +0100
Subject: VFS: Comment mount following code

Add comments describing what the directions "up" and "down" mean and ref count
handling to the VFS mount following family of functions.

Signed-off-by: Valerie Aurora <vaurora@redhat.com> (Original author)
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c     | 10 ++++++++++
 fs/namespace.c | 16 ++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 0e1b9c3eb36..c6dcb4c8f86 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -672,6 +672,16 @@ static int follow_up_rcu(struct path *path)
 	return 1;
 }
 
+/*
+ * follow_up - Find the mountpoint of path's vfsmount
+ *
+ * Given a path, find the mountpoint of its source file system.
+ * Replace @path with the path of the mountpoint in the parent mount.
+ * Up is towards /.
+ *
+ * Return 1 if we went up a level and 0 if we were already at the
+ * root.
+ */
 int follow_up(struct path *path)
 {
 	struct mount *mnt = real_mount(path->mnt);
diff --git a/fs/namespace.c b/fs/namespace.c
index be1b07a774f..c53d3381b0d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -515,8 +515,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 }
 
 /*
- * lookup_mnt increments the ref count before returning
- * the vfsmount struct.
+ * lookup_mnt - Return the first child mount mounted at path
+ *
+ * "First" means first mounted chronologically.  If you create the
+ * following mounts:
+ *
+ * mount /dev/sda1 /mnt
+ * mount /dev/sda2 /mnt
+ * mount /dev/sda3 /mnt
+ *
+ * Then lookup_mnt() on the base /mnt dentry in the root mount will
+ * return successively the root dentry and vfsmount of /dev/sda1, then
+ * /dev/sda2, then /dev/sda3, then NULL.
+ *
+ * lookup_mnt takes a reference to the found vfsmount.
  */
 struct vfsmount *lookup_mnt(struct path *path)
 {
-- 
cgit v1.2.3-70-g09d2


From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Jun 2012 12:55:37 +0100
Subject: VFS: Pass mount flags to sget()

Pass mount flags to sget() so that it can use them in initialising a new
superblock before the set function is called.  They could also be passed to the
compare function.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/mtdsuper.c |  4 +---
 fs/9p/vfs_super.c      |  4 ++--
 fs/afs/super.c         |  3 +--
 fs/btrfs/super.c       |  4 ++--
 fs/ceph/super.c        |  2 +-
 fs/cifs/cifsfs.c       |  9 ++++-----
 fs/devpts/inode.c      |  6 +++---
 fs/ecryptfs/main.c     |  3 +--
 fs/gfs2/ops_fstype.c   |  5 ++---
 fs/libfs.c             |  4 ++--
 fs/logfs/super.c       |  3 +--
 fs/nfs/super.c         |  2 +-
 fs/nilfs2/super.c      |  4 ++--
 fs/proc/root.c         |  3 +--
 fs/reiserfs/procfs.c   |  2 +-
 fs/super.c             | 22 +++++++++++-----------
 fs/sysfs/mount.c       |  3 +--
 fs/ubifs/super.c       |  3 +--
 include/linux/fs.h     |  2 +-
 kernel/cgroup.c        |  2 +-
 20 files changed, 40 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index a90bfe79916..334da5f583c 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -63,7 +63,7 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags,
 	struct super_block *sb;
 	int ret;
 
-	sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, mtd);
+	sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, flags, mtd);
 	if (IS_ERR(sb))
 		goto out_error;
 
@@ -74,8 +74,6 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags,
 	pr_debug("MTDSB: New superblock for device %d (\"%s\")\n",
 	      mtd->index, mtd->name);
 
-	sb->s_flags = flags;
-
 	ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 	if (ret < 0) {
 		deactivate_locked_super(sb);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8c92a9ba833..137d5039689 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 	if (v9ses->cache)
 		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
 
-	sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+	sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
 	if (!v9ses->cache)
 		sb->s_flags |= MS_SYNCHRONOUS;
 
@@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
 		goto close_session;
 	}
 
-	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+	sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses);
 	if (IS_ERR(sb)) {
 		retval = PTR_ERR(sb);
 		goto clunk_fid;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f02b31e7e64..df8c6047c2a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	as->volume = vol;
 
 	/* allocate a deviceless superblock */
-	sb = sget(fs_type, afs_test_super, afs_set_super, as);
+	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		afs_put_volume(vol);
@@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	if (!sb->s_root) {
 		/* initial superblock/root creation */
 		_debug("create");
-		sb->s_flags = flags;
 		ret = afs_fill_super(sb, &params);
 		if (ret < 0) {
 			deactivate_locked_super(sb);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e23991574fd..b19d7556772 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	}
 
 	bdev = fs_devices->latest_bdev;
-	s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info);
+	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
+		 fs_info);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto error_close_devices;
@@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags | MS_NOSEC;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		btrfs_sb(s)->bdev_holder = fs_type;
 		error = btrfs_fill_super(s, fs_devices, data,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1e67dd7305a..7076109f014 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
 
 	if (ceph_test_opt(fsc->client, NOSHARE))
 		compare_super = NULL;
-	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
+	sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
 	if (IS_ERR(sb)) {
 		res = ERR_CAST(sb);
 		goto out;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c0c2751a757..a7610cfedf0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -637,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type,
 	mnt_data.cifs_sb = cifs_sb;
 	mnt_data.flags = flags;
 
-	sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data);
+	/* BB should we make this contingent on mount parm? */
+	flags |= MS_NODIRATIME | MS_NOATIME;
+
+	sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);
 	if (IS_ERR(sb)) {
 		root = ERR_CAST(sb);
 		cifs_umount(cifs_sb);
@@ -648,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type,
 		cFYI(1, "Use existing superblock");
 		cifs_umount(cifs_sb);
 	} else {
-		sb->s_flags = flags;
-		/* BB should we make this contingent on mount parm? */
-		sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
-
 		rc = cifs_read_super(sb);
 		if (rc) {
 			root = ERR_PTR(rc);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 979c1e309c7..14afbabe654 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
 		return ERR_PTR(error);
 
 	if (opts.newinstance)
-		s = sget(fs_type, NULL, set_anon_super, NULL);
+		s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	else
-		s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
+		s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
+			 NULL);
 
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 
 	if (!s->s_root) {
-		s->s_flags = flags;
 		error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error)
 			goto out_undo_sget;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 68954937a07..7edeb3d893c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		goto out;
 	}
 
-	s = sget(fs_type, NULL, set_anon_super, NULL);
+	s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	if (IS_ERR(s)) {
 		rc = PTR_ERR(s);
 		goto out;
 	}
 
-	s->s_flags = flags;
 	rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
 	if (rc)
 		goto out1;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b8c250fc492..6c906078f65 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1286,7 +1286,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
 		error = -EBUSY;
 		goto error_bdev;
 	}
-	s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev);
+	s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	error = PTR_ERR(s);
 	if (IS_ERR(s))
@@ -1316,7 +1316,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags;
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
@@ -1360,7 +1359,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
 		       dev_name, error);
 		return ERR_PTR(error);
 	}
-	s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
+	s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
 		 path.dentry->d_inode->i_sb->s_bdev);
 	path_put(&path);
 	if (IS_ERR(s)) {
diff --git a/fs/libfs.c b/fs/libfs.c
index ebd03f6910d..a74cb1725ac 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
 	const struct super_operations *ops,
 	const struct dentry_operations *dops, unsigned long magic)
 {
-	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+	struct super_block *s;
 	struct dentry *dentry;
 	struct inode *root;
 	struct qstr d_name = QSTR_INIT(name, strlen(name));
 
+	s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 
-	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = MAX_LFS_FILESIZE;
 	s->s_blocksize = PAGE_SIZE;
 	s->s_blocksize_bits = PAGE_SHIFT;
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 97bca623d89..345c24b8a6f 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
 	log_super("LogFS: Start mount %x\n", mount_count++);
 
 	err = -EINVAL;
-	sb = sget(type, logfs_sb_test, logfs_sb_set, super);
+	sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super);
 	if (IS_ERR(sb)) {
 		super->s_devops->put_device(super);
 		kfree(super);
@@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
 	sb->s_maxbytes	= (1ull << 43) - 1;
 	sb->s_max_links = LOGFS_LINK_MAX;
 	sb->s_op	= &logfs_super_operations;
-	sb->s_flags	= flags | MS_NOATIME;
 
 	err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
 	if (err)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 06228192f64..8b2a2977b72 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
 		sb_mntdata.mntflags |= MS_SYNCHRONOUS;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata);
 	if (IS_ERR(s)) {
 		mntroot = ERR_CAST(s);
 		goto out_err_nosb;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1099a76cee5..d57c42f974e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 		err = -EBUSY;
 		goto failed;
 	}
-	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev);
+	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
+		 sd.bdev);
 	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s)) {
 		err = PTR_ERR(s);
@@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 		s_new = true;
 
 		/* New superblock instance created */
-		s->s_flags = flags;
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(sd.bdev));
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 568b20290c7..9a2d9fd7cad 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 		options = data;
 	}
 
-	sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+	sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 
@@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 	}
 
 	if (!sb->s_root) {
-		sb->s_flags = flags;
 		err = proc_fill_super(sb);
 		if (err) {
 			deactivate_locked_super(sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 2c1ade692cc..e60e87035bb 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos)
 	if (l)
 		return NULL;
 
-	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s)))
+	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
 		return NULL;
 
 	up_write(&s->s_umount);
diff --git a/fs/super.c b/fs/super.c
index cf001775617..c743fb3be4b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
 /**
  *	alloc_super	-	create new superblock
  *	@type:	filesystem type superblock should belong to
+ *	@flags: the mount flags
  *
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.
  */
-static struct super_block *alloc_super(struct file_system_type *type)
+static struct super_block *alloc_super(struct file_system_type *type, int flags)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static const struct super_operations default_op;
@@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 #else
 		INIT_LIST_HEAD(&s->s_files);
 #endif
+		s->s_flags = flags;
 		s->s_bdi = &default_backing_dev_info;
 		INIT_HLIST_NODE(&s->s_instances);
 		INIT_HLIST_BL_HEAD(&s->s_anon);
@@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super);
  *	@type:	filesystem type superblock should belong to
  *	@test:	comparison callback
  *	@set:	setup callback
+ *	@flags:	mount flags
  *	@data:	argument to each of them
  */
 struct super_block *sget(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
+			int flags,
 			void *data)
 {
 	struct super_block *s = NULL;
@@ -450,7 +454,7 @@ retry:
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
-		s = alloc_super(type);
+		s = alloc_super(type, flags);
 		if (!s)
 			return ERR_PTR(-ENOMEM);
 		goto retry;
@@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
 {
 	struct super_block *sb;
 
-	sb = sget(fs_type, ns_test_super, ns_set_super, data);
+	sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 
 	if (!sb->s_root) {
 		int err;
-		sb->s_flags = flags;
 		err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (err) {
 			deactivate_locked_super(sb);
@@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		error = -EBUSY;
 		goto error_bdev;
 	}
-	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
+		 bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s))
 		goto error_s;
@@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags | MS_NOSEC;
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
@@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
 	int (*fill_super)(struct super_block *, void *, int))
 {
 	int error;
-	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 
-	s->s_flags = flags;
-
 	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		deactivate_locked_super(s);
@@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type,
 	struct super_block *s;
 	int error;
 
-	s = sget(fs_type, compare_single, set_anon_super, NULL);
+	s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 	if (!s->s_root) {
-		s->s_flags = flags;
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index c15a7a3572e..71eb7e25392 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -118,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
 		info->ns[type] = kobj_ns_grab_current(type);
 
-	sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
+	sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
 	if (IS_ERR(sb) || sb->s_fs_info != info)
 		free_sysfs_super_info(info);
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 	if (!sb->s_root) {
-		sb->s_flags = flags;
 		error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(sb);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5862dd9d278..1c766c39c03 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
 
 	dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
 
-	sb = sget(fs_type, sb_test, sb_set, c);
+	sb = sget(fs_type, sb_test, sb_set, flags, c);
 	if (IS_ERR(sb)) {
 		err = PTR_ERR(sb);
 		kfree(c);
@@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
 			goto out_deact;
 		}
 	} else {
-		sb->s_flags = flags;
 		err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (err)
 			goto out_deact;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2f857e9eeb3..48548bdd772 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1914,7 +1914,7 @@ void free_anon_bdev(dev_t);
 struct super_block *sget(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
-			void *data);
+			int flags, void *data);
 extern struct dentry *mount_pseudo(struct file_system_type *, char *,
 	const struct super_operations *ops,
 	const struct dentry_operations *dops,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0cd1314acda..af2b5641fc8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	opts.new_root = new_root;
 
 	/* Locate an existing or new sb for this hierarchy */
-	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
+	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		cgroup_drop_root(opts.new_root);
-- 
cgit v1.2.3-70-g09d2


From 0bdaea9017b9d2b9996e153a71ee03555969b80e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Jun 2012 12:55:46 +0100
Subject: VFS: Split inode_permission()

Split inode_permission() into inode- and superblock-dependent parts.

This is aimed at unionmounts where the superblock from the upper layer has to
be checked rather than the superblock from the lower layer as the upper layer
may be writable, thus allowing an unwritable file from the lower layer to be
copied up and modified.

Original-author: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com> (Further development)
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  5 +++++
 fs/namei.c    | 66 ++++++++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/internal.h b/fs/internal.h
index 8a9f5fa840f..a6fd56c68b1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -41,6 +41,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
  */
 extern void __init chrdev_init(void);
 
+/*
+ * namei.c
+ */
+extern int __inode_permission(struct inode *, int);
+
 /*
  * namespace.c
  */
diff --git a/fs/namei.c b/fs/namei.c
index c6dcb4c8f86..1b647468769 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask)
 }
 
 /**
- * inode_permission  -  check for access rights to a given inode
- * @inode:	inode to check permission on
- * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
+ * __inode_permission - Check for access rights to a given inode
+ * @inode: Inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
  *
- * Used to check for read/write/execute permissions on an inode.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
+ * Check for read/write/execute permissions on an inode.
  *
  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
+ *
+ * This does not check for a read-only file system.  You probably want
+ * inode_permission().
  */
-int inode_permission(struct inode *inode, int mask)
+int __inode_permission(struct inode *inode, int mask)
 {
 	int retval;
 
 	if (unlikely(mask & MAY_WRITE)) {
-		umode_t mode = inode->i_mode;
-
-		/*
-		 * Nobody gets write access to a read-only fs.
-		 */
-		if (IS_RDONLY(inode) &&
-		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
-			return -EROFS;
-
 		/*
 		 * Nobody gets write access to an immutable file.
 		 */
@@ -358,6 +349,47 @@ int inode_permission(struct inode *inode, int mask)
 	return security_inode_permission(inode, mask);
 }
 
+/**
+ * sb_permission - Check superblock-level permissions
+ * @sb: Superblock of inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Separate out file-system wide checks from inode-specific permission checks.
+ */
+static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
+{
+	if (unlikely(mask & MAY_WRITE)) {
+		umode_t mode = inode->i_mode;
+
+		/* Nobody gets write access to a read-only fs. */
+		if ((sb->s_flags & MS_RDONLY) &&
+		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+			return -EROFS;
+	}
+	return 0;
+}
+
+/**
+ * inode_permission - Check for access rights to a given inode
+ * @inode: Inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
+ * this, letting us set arbitrary permissions for filesystem access without
+ * changing the "normal" UIDs which are used for other things.
+ *
+ * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
+ */
+int inode_permission(struct inode *inode, int mask)
+{
+	int retval;
+
+	retval = sb_permission(inode->i_sb, inode, mask);
+	if (retval)
+		return retval;
+	return __inode_permission(inode, mask);
+}
+
 /**
  * path_get - get a reference to a path
  * @path: path to get the reference to
-- 
cgit v1.2.3-70-g09d2


From 05d290d66be6ef77a0b962ebecf01911bd984a78 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <andersk@MIT.EDU>
Date: Sun, 15 Jul 2012 17:14:25 -0400
Subject: fifo: Do not restart open() if it already found a partner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a parent and child process open the two ends of a fifo, and the
child immediately exits, the parent may receive a SIGCHLD before its
open() returns.  In that case, we need to make sure that open() will
return successfully after the SIGCHLD handler returns, instead of
throwing EINTR or being restarted.  Otherwise, the restarted open()
would incorrectly wait for a second partner on the other end.

The following test demonstrates the EINTR that was wrongly thrown from
the parent’s open().  Change .sa_flags = 0 to .sa_flags = SA_RESTART
to see a deadlock instead, in which the restarted open() waits for a
second reader that will never come.  (On my systems, this happens
pretty reliably within about 5 to 500 iterations.  Others report that
it manages to loop ~forever sometimes; YMMV.)

  #include <sys/stat.h>
  #include <sys/types.h>
  #include <sys/wait.h>
  #include <fcntl.h>
  #include <signal.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>

  #define CHECK(x) do if ((x) == -1) {perror(#x); abort();} while(0)

  void handler(int signum) {}

  int main()
  {
      struct sigaction act = {.sa_handler = handler, .sa_flags = 0};
      CHECK(sigaction(SIGCHLD, &act, NULL));
      CHECK(mknod("fifo", S_IFIFO | S_IRWXU, 0));
      for (;;) {
          int fd;
          pid_t pid;
          putc('.', stderr);
          CHECK(pid = fork());
          if (pid == 0) {
              CHECK(fd = open("fifo", O_RDONLY));
              _exit(0);
          }
          CHECK(fd = open("fifo", O_WRONLY));
          CHECK(close(fd));
          CHECK(waitpid(pid, NULL, 0));
      }
  }

This is what I suspect was causing the Git test suite to fail in
t9010-svn-fe.sh:

	http://bugs.debian.org/678852

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fifo.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e..cf6f4345ceb 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
 
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
 {
 	int cur = *cnt;	
 
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
 		if (signal_pending(current))
 			break;
 	}
+	return cur == *cnt ? -ERESTARTSYS : 0;
 }
 
 static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 				 * seen a writer */
 				filp->f_version = pipe->w_counter;
 			} else {
-				wait_for_partner(inode, &pipe->w_counter);
-				if(signal_pending(current))
+				if (wait_for_partner(inode, &pipe->w_counter))
 					goto err_rd;
 			}
 		}
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			wake_up_partner(inode);
 
 		if (!pipe->readers) {
-			wait_for_partner(inode, &pipe->r_counter);
-			if (signal_pending(current))
+			if (wait_for_partner(inode, &pipe->r_counter))
 				goto err_wr;
 		}
 		break;
-- 
cgit v1.2.3-70-g09d2


From ffc61ccbb96809df8d97ed609ac86b509eaf9056 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 11 Jul 2012 12:28:05 +0100
Subject: Initialise mid_q_entry before putting it on the pending queue

A user reported a crash in cifs_demultiplex_thread() caused by an
incorrectly set mid_q_entry->callback() function. It appears that the
callback assignment made in cifs_call_async() was not flushed back to
memory suggesting that a memory barrier was required here. Changing the
code to make sure that the mid_q_entry structure was completely
initialised before it was added to the pending queue fixes the problem.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/transport.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7..f25d4ea14be 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 	if (mid == NULL)
 		return -ENOMEM;
 
-	/* put it on the pending_mid_q */
-	spin_lock(&GlobalMid_Lock);
-	list_add_tail(&mid->qhead, &server->pending_mid_q);
-	spin_unlock(&GlobalMid_Lock);
-
 	rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
-	if (rc)
-		delete_mid(mid);
+	if (rc) {
+		DeleteMidQEntry(mid);
+		return rc;
+	}
+
 	*ret_mid = mid;
-	return rc;
+	return 0;
 }
 
 /*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 	mid->callback_data = cbdata;
 	mid->mid_state = MID_REQUEST_SUBMITTED;
 
+	/* put it on the pending_mid_q */
+	spin_lock(&GlobalMid_Lock);
+	list_add_tail(&mid->qhead, &server->pending_mid_q);
+	spin_unlock(&GlobalMid_Lock);
+
+
 	cifs_in_send_inc(server);
 	rc = smb_sendv(server, iov, nvec);
 	cifs_in_send_dec(server);
 	cifs_save_when_sent(mid);
 	mutex_unlock(&server->srv_mutex);
 
-	if (rc)
-		goto out_err;
+	if (rc == 0)
+		return 0;
 
-	return rc;
-out_err:
 	delete_mid(mid);
 	add_credits(server, 1);
 	wake_up(&server->request_q);
-- 
cgit v1.2.3-70-g09d2


From 3ae629d98bd5ed77585a878566f04f310adbc591 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 11 Jul 2012 09:09:35 -0400
Subject: cifs: on CONFIG_HIGHMEM machines, limit the rsize/wsize to the kmap
 space

We currently rely on being able to kmap all of the pages in an async
read or write request. If you're on a machine that has CONFIG_HIGHMEM
set then that kmap space is limited, sometimes to as low as 512 slots.

With 512 slots, we can only support up to a 2M r/wsize, and that's
assuming that we can get our greedy little hands on all of them. There
are other users however, so it's possible we'll end up stuck with a
size that large.

Since we can't handle a rsize or wsize larger than that currently, cap
those options at the number of kmap slots we have. We could consider
capping it even lower, but we currently default to a max of 1M. Might as
well allow those luddites on 32 bit arches enough rope to hang
themselves.

A more robust fix would be to teach the send and receive routines how
to contend with an array of pages so we don't need to marshal up a kvec
array at all. That's a fairly significant overhaul though, so we'll need
this limit in place until that's ready.

Cc: <stable@vger.kernel.org>
Reported-by: Jian Li <jiali@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/connect.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ae86ddf221..94b7788c318 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
 #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
 
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT	(LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT	(1<<24)
+#endif /* CONFIG_HIGHMEM */
+
 static unsigned int
 cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 {
@@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 		wsize = min_t(unsigned int, wsize,
 				server->maxBuf - sizeof(WRITE_REQ) + 4);
 
+	/* limit to the amount that we can kmap at once */
+	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
+
 	/* hard limit of CIFS_MAX_WSIZE */
 	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
 
@@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 	if (!(server->capabilities & CAP_LARGE_READ_X))
 		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
 
+	/* limit to the amount that we can kmap at once */
+	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
+
 	/* hard limit of CIFS_MAX_RSIZE */
 	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
 
-- 
cgit v1.2.3-70-g09d2


From 3cf003c08be785af4bee9ac05891a15bcbff856a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 11 Jul 2012 09:09:36 -0400
Subject: cifs: when CONFIG_HIGHMEM is set, serialize the read/write kmaps

Jian found that when he ran fsx on a 32 bit arch with a large wsize the
process and one of the bdi writeback kthreads would sometimes deadlock
with a stack trace like this:

crash> bt
PID: 2789   TASK: f02edaa0  CPU: 3   COMMAND: "fsx"
 #0 [eed63cbc] schedule at c083c5b3
 #1 [eed63d80] kmap_high at c0500ec8
 #2 [eed63db0] cifs_async_writev at f7fabcd7 [cifs]
 #3 [eed63df0] cifs_writepages at f7fb7f5c [cifs]
 #4 [eed63e50] do_writepages at c04f3e32
 #5 [eed63e54] __filemap_fdatawrite_range at c04e152a
 #6 [eed63ea4] filemap_fdatawrite at c04e1b3e
 #7 [eed63eb4] cifs_file_aio_write at f7fa111a [cifs]
 #8 [eed63ecc] do_sync_write at c052d202
 #9 [eed63f74] vfs_write at c052d4ee
#10 [eed63f94] sys_write at c052df4c
#11 [eed63fb0] ia32_sysenter_target at c0409a98
    EAX: 00000004  EBX: 00000003  ECX: abd73b73  EDX: 012a65c6
    DS:  007b      ESI: 012a65c6  ES:  007b      EDI: 00000000
    SS:  007b      ESP: bf8db178  EBP: bf8db1f8  GS:  0033
    CS:  0073      EIP: 40000424  ERR: 00000004  EFLAGS: 00000246

Each task would kmap part of its address array before getting stuck, but
not enough to actually issue the write.

This patch fixes this by serializing the marshal_iov operations for
async reads and writes. The idea here is to ensure that cifs
aggressively tries to populate a request before attempting to fulfill
another one. As soon as all of the pages are kmapped for a request, then
we can unlock and allow another one to proceed.

There's no need to do this serialization on non-CONFIG_HIGHMEM arches
however, so optimize all of this out when CONFIG_HIGHMEM isn't set.

Cc: <stable@vger.kernel.org>
Reported-by: Jian Li <jiali@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifssmb.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c21..4ee522b3f66 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
 
-/* Forward declarations */
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+
+static inline void
+cifs_kmap_lock(void)
+{
+	mutex_lock(&cifs_kmap_mutex);
+}
+
+static inline void
+cifs_kmap_unlock(void)
+{
+	mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
 
 /* Mark as invalid, all open files on tree connections since they
    were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	}
 
 	/* marshal up the page array */
+	cifs_kmap_lock();
 	len = rdata->marshal_iov(rdata, data_len);
+	cifs_kmap_unlock();
 	data_len -= len;
 
 	/* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	 * and set the iov_len properly for each one. It may also set
 	 * wdata->bytes too.
 	 */
+	cifs_kmap_lock();
 	wdata->marshal_iov(iov, wdata);
+	cifs_kmap_unlock();
 
 	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
 
-- 
cgit v1.2.3-70-g09d2


From cd60042cc1392e79410dc8de9e9c1abb38a29e57 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 6 Jul 2012 07:09:42 -0400
Subject: cifs: always update the inode cache with the results from a FIND_*

When we get back a FIND_FIRST/NEXT result, we have some info about the
dentry that we use to instantiate a new inode. We were ignoring and
discarding that info when we had an existing dentry in the cache.

Fix this by updating the inode in place when we find an existing dentry
and the uniqueid is the same.

Cc: <stable@vger.kernel.org> # .31.x
Reported-and-Tested-by: Andrew Bartlett <abartlet@samba.org>
Reported-by: Bill Robertson <bill_robertson@debortoli.com.au>
Reported-by: Dion Edwards <dion_edwards@debortoli.com.au>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/readdir.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c..a4217f02fab 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 
 	dentry = d_lookup(parent, name);
 	if (dentry) {
-		/* FIXME: check for inode number changes? */
-		if (dentry->d_inode != NULL)
+		inode = dentry->d_inode;
+		/* update inode in place if i_ino didn't change */
+		if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+			cifs_fattr_to_inode(inode, fattr);
 			return dentry;
+		}
 		d_drop(dentry);
 		dput(dentry);
 	}
-- 
cgit v1.2.3-70-g09d2


From d9914cf66181b8aa0929775f5c6f675c6ebc3eb5 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@gmail.com>
Date: Tue, 17 Jul 2012 21:37:27 +0200
Subject: PM: Rename CAP_EPOLLWAKEUP to CAP_BLOCK_SUSPEND

As discussed in
http://thread.gmane.org/gmane.linux.kernel/1249726/focus=1288990,
the capability introduced in 4d7e30d98939a0340022ccd49325a3d70f7e0238
to govern EPOLLWAKEUP seems misnamed: this capability is about governing
the ability to suspend the system, not using a particular API flag
(EPOLLWAKEUP). We should make the name of the capability more general
to encourage reuse in related cases. (Whether or not this capability
should also be used to govern the use of /sys/power/wake_lock is a
question that needs to be separately resolved.)

This patch renames the capability to CAP_BLOCK_SUSPEND. In order to ensure
that the old capability name doesn't make it out into the wild, could you
please apply and push up the tree to ensure that it is incorporated
for the 3.5 release.

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/eventpoll.c             | 2 +-
 include/linux/capability.h | 6 +++---
 include/linux/eventpoll.h  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efe..1c8b5567080 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 		goto error_tgt_fput;
 
 	/* Check if EPOLLWAKEUP is allowed */
-	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
+	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
 		epds.events &= ~EPOLLWAKEUP;
 
 	/*
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 68d56effc32..d10b7ed595b 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -360,11 +360,11 @@ struct cpu_vfs_cap_data {
 
 #define CAP_WAKE_ALARM            35
 
-/* Allow preventing system suspends while epoll events are pending */
+/* Allow preventing system suspends */
 
-#define CAP_EPOLLWAKEUP      36
+#define CAP_BLOCK_SUSPEND    36
 
-#define CAP_LAST_CAP         CAP_EPOLLWAKEUP
+#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6f8be328770..f4bb378ccf6 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -34,7 +34,7 @@
  * re-allowed until epoll_wait is called again after consuming the wakeup
  * event(s).
  *
- * Requires CAP_EPOLLWAKEUP
+ * Requires CAP_BLOCK_SUSPEND
  */
 #define EPOLLWAKEUP (1 << 29)
 
-- 
cgit v1.2.3-70-g09d2


From 294f2ad5a545eb71d397623743ddd8201131bdad Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Wed, 18 Jul 2012 11:56:59 -0400
Subject: GFS2: kernel panic with small gfs2 filesystems - 1 RG

In the unlikely setup where there's only one resource group in the gfs2
filesystem, gfs2_rgrpd_get_next() returns a NULL rgd that is not dealt with
properly, causing a kernel NULL ptr dereference. This patch fixes this issue.

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e53d0a1c234..fb7079263ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1276,6 +1276,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
+			rgd = rgd ? : begin; /* if NULL, wrap */
 			if (rgd != begin) /* If we didn't wrap */
 				break;
 
-- 
cgit v1.2.3-70-g09d2


From 331ae4962b975246944ea039697a8f1cadce42bb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 18 Jul 2012 09:31:36 +0100
Subject: ext4: fix duplicated mnt_drop_write call in EXT4_IOC_MOVE_EXT

Caused, AFAICS, by mismerge in commit ff9cb1c4eead ("Merge branch
'for_linus' into for_linus_merged")

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org  # 3.3+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f36..6ec6f9ee2fe 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
 					me.donor_start, me.len, &me.moved_len);
 		mnt_drop_write_file(filp);
-		mnt_drop_write(filp->f_path.mnt);
 
 		if (copy_to_user((struct move_extent __user *)arg,
 				 &me, sizeof(me)))
-- 
cgit v1.2.3-70-g09d2


From 8e2e00473598dd5379d8408cb974dade000acafc Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 19 Jul 2012 08:12:40 -0400
Subject: GFS2: Reduce file fragmentation

This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.

The implementation uses an rbtree of reservations attached to each
resource group (and each inode).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c       |   3 +
 fs/gfs2/file.c       |  24 +--
 fs/gfs2/incore.h     |  49 +++--
 fs/gfs2/inode.c      |  37 +++-
 fs/gfs2/rgrp.c       | 578 ++++++++++++++++++++++++++++++++++++++++++++++-----
 fs/gfs2/rgrp.h       |  31 ++-
 fs/gfs2/super.c      |   7 +
 fs/gfs2/trace_gfs2.h |  59 +++++-
 fs/gfs2/xattr.c      |  12 ++
 9 files changed, 708 insertions(+), 92 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d957a86482..49cd7dd4a9f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	if (error)
 		goto out_rlist;
 
+	if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
+		gfs2_rs_deltree(ip->i_res);
+
 	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
 				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
 				 revokes);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6fbf3cbd974..9f94832cefe 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (ret)
 		return ret;
 
+	atomic_set(&ip->i_res->rs_sizehint,
+		   PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret)
@@ -571,22 +574,15 @@ fail:
 
 static int gfs2_release(struct inode *inode, struct file *file)
 {
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_file *fp;
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	fp = file->private_data;
+	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
+	if ((file->f_mode & FMODE_WRITE) &&
 	    (atomic_read(&inode->i_writecount) == 1))
 		gfs2_rs_delete(ip);
 
-	if (gfs2_assert_warn(sdp, fp))
-		return -EIO;
-
-	kfree(fp);
-
 	return 0;
 }
 
@@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	size_t writesize = iov_length(iov, nr_segs);
 	struct dentry *dentry = file->f_dentry;
 	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct gfs2_sbd *sdp;
 	int ret;
 
+	sdp = GFS2_SB(file->f_mapping->host);
 	ret = gfs2_rs_alloc(ip);
 	if (ret)
 		return ret;
 
+	atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
 	if (file->f_flags & O_APPEND) {
 		struct gfs2_holder gh;
 
@@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (unlikely(error))
 		goto out_uninit;
 
+	atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
+
 	while (len > 0) {
 		if (len < bytes)
 			bytes = len;
@@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 			offset += bytes;
 			continue;
 		}
-		error = gfs2_rindex_update(sdp);
-		if (error)
-			goto out_unlock;
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
 			goto out_unlock;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dc730700b3b..aaecc8085fc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -84,6 +84,7 @@ struct gfs2_rgrpd {
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
+	u32 rd_reserved;                /* number of blocks reserved */
 	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
@@ -96,6 +97,9 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
+	spinlock_t rd_rsspin;           /* protects reservation related vars */
+	struct rb_root rd_rstree;       /* multi-block reservation tree */
+	u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
 enum gfs2_state_bits {
@@ -233,6 +237,38 @@ struct gfs2_holder {
 	unsigned long gh_ip;
 };
 
+/* Resource group multi-block reservation, in order of appearance:
+
+   Step 1. Function prepares to write, allocates a mb, sets the size hint.
+   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
+   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
+   Step 4. Bits are assigned from the rgrp based on either the reservation
+           or wherever it can.
+*/
+
+struct gfs2_blkreserv {
+	/* components used during write (step 1): */
+	atomic_t rs_sizehint;         /* hint of the write size */
+
+	/* components used during inplace_reserve (step 2): */
+	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+
+	/* components used during get_local_rgrp (step 3): */
+	struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
+	struct rb_node rs_node;       /* link to other block reservations */
+
+	/* components used during block searches and assignments (step 4): */
+	struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
+	u32 rs_biblk;                 /* start block relative to the bi */
+	u32 rs_free;                  /* how many blocks are still free */
+
+	/* ancillary quota stuff */
+	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+	unsigned int rs_qa_qd_num;
+};
+
 enum {
 	GLF_LOCK			= 1,
 	GLF_DEMOTE			= 3,
@@ -290,16 +326,6 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
 
-struct gfs2_blkreserv {
-	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-	struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
-
-	/* ancillary quota stuff */
-	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
-	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
-	unsigned int rs_qa_qd_num;
-};
-
 enum {
 	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
@@ -307,7 +333,6 @@ enum {
 	GIF_SW_PAGED		= 3,
 };
 
-
 struct gfs2_inode {
 	struct inode i_inode;
 	u64 i_no_addr;
@@ -318,7 +343,7 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_blkreserv *i_res; /* resource group block reservation */
+	struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
 	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b035e0959b..c53c67e30bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	int error;
 
 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
 
 	error = gfs2_quota_lock(dip, uid, gid);
 	if (error)
@@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	struct buffer_head *dibh;
 	int error;
 
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
+
 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto fail;
@@ -596,7 +603,8 @@ fail_end_trans:
 	gfs2_trans_end(sdp);
 
 fail_ipreserv:
-	gfs2_inplace_release(dip);
+	if (alloc_required)
+		gfs2_inplace_release(dip);
 
 fail_quota_locks:
 	gfs2_quota_unlock(dip);
@@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_holder ghs[2];
 	struct inode *inode = NULL;
-	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
@@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
+	/* We need a reservation to allocate the new dinode block. The
+	   directory ip temporarily points to the reservation, but this is
+	   being done to get a set of contiguous blocks for the new dinode.
+	   Since this is a create, we don't have a sizehint yet, so it will
+	   have to use the minimum reservation size. */
 	error = gfs2_rs_alloc(dip);
 	if (error)
 		return error;
@@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
-	error = gfs2_inode_refresh(GFS2_I(inode));
+	ip = GFS2_I(inode);
+	error = gfs2_inode_refresh(ip);
 	if (error)
 		goto fail_gunlock2;
 
-	/* the new inode needs a reservation so it can allocate xattrs. */
-	error = gfs2_rs_alloc(GFS2_I(inode));
-	if (error)
-		goto fail_gunlock2;
+	/* The newly created inode needs a reservation so it can allocate
+	   xattrs. At the same time, we want new blocks allocated to the new
+	   dinode to be as contiguous as possible. Since we allocated the
+	   dinode block under the directory's reservation, we transfer
+	   ownership of that reservation to the new inode. The directory
+	   doesn't need a reservation unless it needs a new allocation. */
+	ip->i_res = dip->i_res;
+	dip->i_res = NULL;
 
 	error = gfs2_acl_create(dip, inode);
 	if (error)
 		goto fail_gunlock2;
 
-	error = gfs2_security_init(dip, GFS2_I(inode), name);
+	error = gfs2_security_init(dip, ip, name);
 	if (error)
 		goto fail_gunlock2;
 
-	error = link_dinode(dip, name, GFS2_I(inode));
+	error = link_dinode(dip, name, ip);
 	if (error)
 		goto fail_gunlock2;
 
@@ -738,6 +756,7 @@ fail_gunlock:
 		iput(inode);
 	}
 fail:
+	gfs2_rs_delete(dip);
 	if (bh)
 		brelse(bh);
 	return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fb7079263ea..4d34887a601 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+#define RSRV_CONTENTION_FACTOR 4
+#define RGRP_RSRV_MAX_CONTENDERS 2
+
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 	return tmp;
 }
 
+/**
+ * rs_cmp - multi-block reservation range compare
+ * @blk: absolute file system block number of the new reservation
+ * @len: number of blocks in the new reservation
+ * @rs: existing reservation to compare against
+ *
+ * returns: 1 if the block range is beyond the reach of the reservation
+ *         -1 if the block range is before the start of the reservation
+ *          0 if the block range overlaps with the reservation
+ */
+static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+{
+	u64 startblk = gfs2_rs_startblk(rs);
+
+	if (blk >= startblk + rs->rs_free)
+		return 1;
+	if (blk + len - 1 < startblk)
+		return -1;
+	return 0;
+}
+
+/**
+ * rs_find - Find a rgrp multi-block reservation that contains a given block
+ * @rgd: The rgrp
+ * @rgblk: The block we're looking for, relative to the rgrp
+ */
+static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
+{
+	struct rb_node **newn;
+	int rc;
+	u64 fsblk = rgblk + rgd->rd_data0;
+
+	spin_lock(&rgd->rd_rsspin);
+	newn = &rgd->rd_rstree.rb_node;
+	while (*newn) {
+		struct gfs2_blkreserv *cur =
+			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+		rc = rs_cmp(fsblk, 1, cur);
+		if (rc < 0)
+			newn = &((*newn)->rb_left);
+		else if (rc > 0)
+			newn = &((*newn)->rb_right);
+		else {
+			spin_unlock(&rgd->rd_rsspin);
+			return cur;
+		}
+	}
+	spin_unlock(&rgd->rd_rsspin);
+	return NULL;
+}
+
 /**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *       a block in a given allocation state.
@@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 int gfs2_rs_alloc(struct gfs2_inode *ip)
 {
 	int error = 0;
+	struct gfs2_blkreserv *res;
+
+	if (ip->i_res)
+		return 0;
+
+	res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+	if (!res)
+		error = -ENOMEM;
 
 	down_write(&ip->i_rw_mutex);
-	if (!ip->i_res) {
-		ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-		if (!ip->i_res)
-			error = -ENOMEM;
-	}
+	if (ip->i_res)
+		kmem_cache_free(gfs2_rsrv_cachep, res);
+	else
+		ip->i_res = res;
 	up_write(&ip->i_rw_mutex);
 	return error;
 }
 
+static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+{
+	gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
+		       rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
+		       rs->rs_free);
+}
+
 /**
- * gfs2_rs_delete - delete a reservation
+ * __rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+static void __rs_deltree(struct gfs2_blkreserv *rs)
+{
+	struct gfs2_rgrpd *rgd;
+
+	if (!gfs2_rs_active(rs))
+		return;
+
+	rgd = rs->rs_rgd;
+	/* We can't do this: The reason is that when the rgrp is invalidated,
+	   it's in the "middle" of acquiring the glock, but the HOLDER bit
+	   isn't set yet:
+	   BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
+	trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
+
+	if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
+		rb_erase(&rs->rs_node, &rgd->rd_rstree);
+	BUG_ON(!rgd->rd_rs_cnt);
+	rgd->rd_rs_cnt--;
+
+	if (rs->rs_free) {
+		/* return reserved blocks to the rgrp and the ip */
+		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+		rs->rs_rgd->rd_reserved -= rs->rs_free;
+		rs->rs_free = 0;
+		clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+		smp_mb__after_clear_bit();
+	}
+	/* We can't change any of the step 1 or step 2 components of the rs.
+	   E.g. We can't set rs_rgd to NULL because the rgd glock is held and
+	   dequeued through this pointer.
+	   Can't: atomic_set(&rs->rs_sizehint, 0);
+	   Can't: rs->rs_requested = 0;
+	   Can't: rs->rs_rgd = NULL;*/
+	rs->rs_bi = NULL;
+	rs->rs_biblk = 0;
+}
+
+/**
+ * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+{
+	struct gfs2_rgrpd *rgd;
+
+	if (!gfs2_rs_active(rs))
+		return;
+
+	rgd = rs->rs_rgd;
+	spin_lock(&rgd->rd_rsspin);
+	__rs_deltree(rs);
+	spin_unlock(&rgd->rd_rsspin);
+}
+
+/**
+ * gfs2_rs_delete - delete a multi-block reservation
  * @ip: The inode for this reservation
  *
  */
@@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
 {
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res) {
+		gfs2_rs_deltree(ip->i_res);
+		trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
+		BUG_ON(ip->i_res->rs_free);
 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
 		ip->i_res = NULL;
 	}
 	up_write(&ip->i_rw_mutex);
 }
 
+/**
+ * return_all_reservations - return all reserved blocks back to the rgrp.
+ * @rgd: the rgrp that needs its space back
+ *
+ * We previously reserved a bunch of blocks for allocation. Now we need to
+ * give them back. This leave the reservation structures in tact, but removes
+ * all of their corresponding "no-fly zones".
+ */
+static void return_all_reservations(struct gfs2_rgrpd *rgd)
+{
+	struct rb_node *n;
+	struct gfs2_blkreserv *rs;
+
+	spin_lock(&rgd->rd_rsspin);
+	while ((n = rb_first(&rgd->rd_rstree))) {
+		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		__rs_deltree(rs);
+	}
+	spin_unlock(&rgd->rd_rsspin);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
 	struct rb_node *n;
@@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
 		gfs2_free_clones(rgd);
 		kfree(rgd->rd_bits);
+		return_all_reservations(rgd);
 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 	}
 }
@@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 	rgd->rd_data = be32_to_cpu(buf.ri_data);
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+	spin_lock_init(&rgd->rd_rsspin);
 
 	error = compute_bitstructs(rgd);
 	if (error)
@@ -1114,30 +1268,213 @@ out:
 	return ret;
 }
 
+/**
+ * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
+ * @bi: the bitmap with the blocks
+ * @ip: the inode structure
+ * @biblk: the 32-bit block number relative to the start of the bitmap
+ * @amount: the number of blocks to reserve
+ *
+ * Returns: NULL - reservation was already taken, so not inserted
+ *          pointer to the inserted reservation
+ */
+static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
+				       struct gfs2_inode *ip, u32 biblk,
+				       int amount)
+{
+	struct rb_node **newn, *parent = NULL;
+	int rc;
+	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+
+	spin_lock(&rgd->rd_rsspin);
+	newn = &rgd->rd_rstree.rb_node;
+	BUG_ON(!ip->i_res);
+	BUG_ON(gfs2_rs_active(rs));
+	/* Figure out where to put new node */
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+	while (*newn) {
+		struct gfs2_blkreserv *cur =
+			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+
+		parent = *newn;
+		rc = rs_cmp(fsblock, amount, cur);
+		if (rc > 0)
+			newn = &((*newn)->rb_right);
+		else if (rc < 0)
+			newn = &((*newn)->rb_left);
+		else {
+			spin_unlock(&rgd->rd_rsspin);
+			return NULL; /* reservation already in use */
+		}
+	}
+
+	/* Do our reservation work */
+	rs = ip->i_res;
+	rs->rs_free = amount;
+	rs->rs_biblk = biblk;
+	rs->rs_bi = bi;
+	rb_link_node(&rs->rs_node, parent, newn);
+	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+
+	/* Do our inode accounting for the reservation */
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+
+	/* Do our rgrp accounting for the reservation */
+	rgd->rd_reserved += amount; /* blocks reserved */
+	rgd->rd_rs_cnt++; /* number of in-tree reservations */
+	spin_unlock(&rgd->rd_rsspin);
+	trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+	return rs;
+}
+
+/**
+ * unclaimed_blocks - return number of blocks that aren't spoken for
+ */
+static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
+{
+	return rgd->rd_free_clone - rgd->rd_reserved;
+}
+
+/**
+ * rg_mblk_search - find a group of multiple free blocks
+ * @rgd: the resource group descriptor
+ * @rs: the block reservation
+ * @ip: pointer to the inode for which we're reserving blocks
+ *
+ * This is very similar to rgblk_search, except we're looking for whole
+ * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
+ * on aligned dwords for speed's sake.
+ *
+ * Returns: 0 if successful or BFITNOENT if there isn't enough free space
+ */
+
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+{
+	struct gfs2_bitmap *bi = rgd->rd_bits;
+	const u32 length = rgd->rd_length;
+	u32 blk;
+	unsigned int buf, x, search_bytes;
+	u8 *buffer = NULL;
+	u8 *ptr, *end, *nonzero;
+	u32 goal, rsv_bytes;
+	struct gfs2_blkreserv *rs;
+	u32 best_rs_bytes, unclaimed;
+	int best_rs_blocks;
+
+	/* Find bitmap block that contains bits for goal block */
+	if (rgrp_contains_block(rgd, ip->i_goal))
+		goal = ip->i_goal - rgd->rd_data0;
+	else
+		goal = rgd->rd_last_alloc;
+	for (buf = 0; buf < length; buf++) {
+		bi = rgd->rd_bits + buf;
+		/* Convert scope of "goal" from rgrp-wide to within
+		   found bit block */
+		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+			goal -= bi->bi_start * GFS2_NBBY;
+			goto do_search;
+		}
+	}
+	buf = 0;
+	goal = 0;
+
+do_search:
+	best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
+			       (RGRP_RSRV_MINBLKS * rgd->rd_length));
+	best_rs_bytes = (best_rs_blocks *
+			 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
+		GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
+	best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
+	unclaimed = unclaimed_blocks(rgd);
+	if (best_rs_bytes * GFS2_NBBY > unclaimed)
+		best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
+
+	for (x = 0; x <= length; x++) {
+		bi = rgd->rd_bits + buf;
+
+		if (test_bit(GBF_FULL, &bi->bi_flags))
+			goto skip;
+
+		WARN_ON(!buffer_uptodate(bi->bi_bh));
+		if (bi->bi_clone)
+			buffer = bi->bi_clone + bi->bi_offset;
+		else
+			buffer = bi->bi_bh->b_data + bi->bi_offset;
+
+		/* We have to keep the reservations aligned on u64 boundaries
+		   otherwise we could get situations where a byte can't be
+		   used because it's after a reservation, but a free bit still
+		   is within the reservation's area. */
+		ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
+		end = (buffer + bi->bi_len);
+		while (ptr < end) {
+			rsv_bytes = 0;
+			if ((ptr + best_rs_bytes) <= end)
+				search_bytes = best_rs_bytes;
+			else
+				search_bytes = end - ptr;
+			BUG_ON(!search_bytes);
+			nonzero = memchr_inv(ptr, 0, search_bytes);
+			/* If the lot is all zeroes, reserve the whole size. If
+			   there's enough zeroes to satisfy the request, use
+			   what we can. If there's not enough, keep looking. */
+			if (nonzero == NULL)
+				rsv_bytes = search_bytes;
+			else if ((nonzero - ptr) * GFS2_NBBY >=
+				 ip->i_res->rs_requested)
+				rsv_bytes = (nonzero - ptr);
+
+			if (rsv_bytes) {
+				blk = ((ptr - buffer) * GFS2_NBBY);
+				BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
+				rs = rs_insert(bi, ip, blk,
+					       rsv_bytes * GFS2_NBBY);
+				if (IS_ERR(rs))
+					return PTR_ERR(rs);
+				if (rs)
+					return 0;
+			}
+			ptr += ALIGN(search_bytes, sizeof(u64));
+		}
+skip:
+		/* Try next bitmap block (wrap back to rgrp header
+		   if at end) */
+		buf++;
+		buf %= length;
+		goal = 0;
+	}
+
+	return BFITNOENT;
+}
+
 /**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
  * @ip: the inode
  *
  * If there's room for the requested blocks to be allocated from the RG:
+ * This will try to get a multi-block reservation first, and if that doesn't
+ * fit, it will take what it can.
  *
  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
-static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-	const struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = ip->i_res;
 
 	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 		return 0;
-	if (rgd->rd_free_clone >= rs->rs_requested)
+	/* Look for a multi-block reservation. */
+	if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
+	    rg_mblk_search(rgd, ip) != BFITNOENT)
+		return 1;
+	if (unclaimed_blocks(rgd) >= rs->rs_requested)
 		return 1;
-	return 0;
-}
 
-static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
-{
-	return (bi->bi_start * GFS2_NBBY) + blk;
+	return 0;
 }
 
 /**
@@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_rgrpd *rgd, *begin = NULL;
+	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = ip->i_res;
 	int error = 0, rg_locked, flags = LM_FLAG_TRY;
 	u64 last_unlinked = NO_BLOCK;
@@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
-	rs = ip->i_res;
 	rs->rs_requested = requested;
 	if (gfs2_assert_warn(sdp, requested)) {
 		error = -EINVAL;
 		goto out;
 	}
-
-	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
-		rgd = begin = ip->i_rgd;
-	else
-		rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
-
-	if (rgd == NULL)
+	if (gfs2_rs_active(rs)) {
+		begin = rs->rs_rgd;
+		flags = 0; /* Yoda: Do or do not. There is no try */
+	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
+		rs->rs_rgd = begin = ip->i_rgd;
+	} else {
+		rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+	}
+	if (rs->rs_rgd == NULL)
 		return -EBADSLT;
 
 	while (loops < 3) {
 		rg_locked = 0;
 
-		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+		if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
 			rg_locked = 1;
 			error = 0;
+		} else if (!loops && !gfs2_rs_active(rs) &&
+			   rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+			/* If the rgrp already is maxed out for contenders,
+			   we can eliminate it as a "first pass" without even
+			   requesting the rgrp glock. */
+			error = GLR_TRYFAILED;
 		} else {
-			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-						   flags, &rs->rs_rgd_gh);
+			error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+						   LM_ST_EXCLUSIVE, flags,
+						   &rs->rs_rgd_gh);
 			if (!error && sdp->sd_args.ar_rgrplvb) {
-				error = update_rgrp_lvb(rgd);
+				error = update_rgrp_lvb(rs->rs_rgd);
 				if (error) {
 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 					return error;
@@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		}
 		switch (error) {
 		case 0:
-			if (try_rgrp_fit(rgd, ip)) {
+			if (gfs2_rs_active(rs)) {
+				if (unclaimed_blocks(rs->rs_rgd) +
+				    rs->rs_free >= rs->rs_requested) {
+					ip->i_rgd = rs->rs_rgd;
+					return 0;
+				}
+				/* We have a multi-block reservation, but the
+				   rgrp doesn't have enough free blocks to
+				   satisfy the request. Free the reservation
+				   and look for a suitable rgrp. */
+				gfs2_rs_deltree(rs);
+			}
+			if (try_rgrp_fit(rs->rs_rgd, ip)) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rgd);
-				ip->i_rgd = rgd;
+					gfs2_rgrp_bh_get(rs->rs_rgd);
+				ip->i_rgd = rs->rs_rgd;
 				return 0;
 			}
-			if (rgd->rd_flags & GFS2_RDF_CHECK) {
+			if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rgd);
-				try_rgrp_unlink(rgd, &last_unlinked,
+					gfs2_rgrp_bh_get(rs->rs_rgd);
+				try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
 						ip->i_no_addr);
 			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
-			rgd = gfs2_rgrpd_get_next(rgd);
-			rgd = rgd ? : begin; /* if NULL, wrap */
-			if (rgd != begin) /* If we didn't wrap */
+			rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
+			rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
+			if (rs->rs_rgd != begin) /* If we didn't wrap */
 				break;
 
 			flags &= ~LM_FLAG_TRY;
@@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
 
+	if (!rs)
+		return;
+
+	if (!rs->rs_free)
+		gfs2_rs_deltree(rs);
+
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 	rs->rs_requested = 0;
@@ -1413,7 +1776,27 @@ do_search:
 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
 			buffer = bi->bi_clone + bi->bi_offset;
 
-		biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+		while (1) {
+			struct gfs2_blkreserv *rs;
+			u32 rgblk;
+
+			biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+			if (biblk == BFITNOENT)
+				break;
+			/* Check if this block is reserved() */
+			rgblk = gfs2_bi2rgd_blk(bi, biblk);
+			rs = rs_find(rgd, rgblk);
+			if (rs == NULL)
+				break;
+
+			BUG_ON(rs->rs_bi != bi);
+			biblk = BFITNOENT;
+			/* This should jump to the first block after the
+			   reservation. */
+			goal = rs->rs_biblk + rs->rs_free;
+			if (goal >= bi->bi_len * GFS2_NBBY)
+				break;
+		}
 		if (biblk != BFITNOENT)
 			break;
 
@@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
 			     u32 blk, bool dinode, unsigned int *n)
 {
 	const unsigned int elen = *n;
-	u32 goal;
+	u32 goal, rgblk;
 	const u8 *buffer = NULL;
+	struct gfs2_blkreserv *rs;
 
 	*n = 0;
 	buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
 		goal++;
 		if (goal >= (bi->bi_len * GFS2_NBBY))
 			break;
+		rgblk = gfs2_bi2rgd_blk(bi, goal);
+		rs = rs_find(rgd, rgblk);
+		if (rs) /* Oops, we bumped into someone's reservation */
+			break;
 		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
 		    GFS2_BLKST_FREE)
 			break;
@@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 
 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 {
-	const struct gfs2_rgrpd *rgd = gl->gl_object;
+	struct gfs2_rgrpd *rgd = gl->gl_object;
+	struct gfs2_blkreserv *trs;
+	const struct rb_node *n;
+
 	if (rgd == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+		       rgd->rd_reserved);
+	spin_lock(&rgd->rd_rsspin);
+	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
+		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		dump_rs(seq, trs);
+	}
+	spin_unlock(&rgd->rd_rsspin);
 	return 0;
 }
 
@@ -1557,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 	rgd->rd_flags |= GFS2_RDF_ERROR;
 }
 
+/**
+ * claim_reserved_blks - Claim previously reserved blocks
+ * @ip: the inode that's claiming the reservation
+ * @dinode: 1 if this block is a dinode block, otherwise data block
+ * @nblocks: desired extent length
+ *
+ * Lay claim to previously allocated block reservation blocks.
+ * Returns: Starting block number of the blocks claimed.
+ * Sets *nblocks to the actual extent length allocated.
+ */
+static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
+			       unsigned int *nblocks)
+{
+	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_bitmap *bi;
+	u64 start_block = gfs2_rs_startblk(rs);
+	const unsigned int elen = *nblocks;
+
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+	gfs2_assert_withdraw(sdp, rgd);
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+	bi = rs->rs_bi;
+	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+
+	for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
+		/* Make sure the bitmap hasn't changed */
+		gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
+			    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+		rs->rs_biblk++;
+		rs->rs_free--;
+
+		BUG_ON(!rgd->rd_reserved);
+		rgd->rd_reserved--;
+		dinode = false;
+		trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+	}
+
+	if (!rs->rs_free) {
+		struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+
+		gfs2_rs_deltree(rs);
+		/* -nblocks because we haven't returned to do the math yet.
+		   I'm doing the math backwards to prevent negative numbers,
+		   but think of it as:
+		   if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
+		if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
+			rg_mblk_search(rgd, ip);
+	}
+	return start_block;
+}
+
 /**
  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
  * @ip: the inode to allocate the block for
  * @bn: Used to return the starting block number
- * @ndata: requested number of blocks/extent length (value/result)
+ * @nblocks: requested number of blocks/extent length (value/result)
  * @dinode: 1 if we're allocating a dinode block, else 0
  * @generation: the generation number of the inode
  *
@@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	if (ip->i_res->rs_requested == 0)
 		return -ECANCELED;
 
-	rgd = ip->i_rgd;
-
-	if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-		goal = ip->i_goal - rgd->rd_data0;
-	else
-		goal = rgd->rd_last_alloc;
-
-	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+	/* Check if we have a multi-block reservation, and if so, claim the
+	   next free block from it. */
+	if (gfs2_rs_active(ip->i_res)) {
+		BUG_ON(!ip->i_res->rs_free);
+		rgd = ip->i_res->rs_rgd;
+		block = claim_reserved_blks(ip, dinode, nblocks);
+	} else {
+		rgd = ip->i_rgd;
 
-	/* Since all blocks are reserved in advance, this shouldn't happen */
-	if (blk == BFITNOENT)
-		goto rgrp_error;
+		if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
+			goal = ip->i_goal - rgd->rd_data0;
+		else
+			goal = rgd->rd_last_alloc;
+
+		blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+
+		/* Since all blocks are reserved in advance, this shouldn't
+		   happen */
+		if (blk == BFITNOENT) {
+			printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
+			       *nblocks);
+			printk(KERN_WARNING "FULL=%d\n",
+			       test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+			goto rgrp_error;
+		}
 
-	block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+		block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+	}
 	ndata = *nblocks;
 	if (dinode)
 		ndata--;
@@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 			brelse(dibh);
 		}
 	}
-	if (rgd->rd_free < *nblocks)
+	if (rgd->rd_free < *nblocks) {
+		printk(KERN_WARNING "nblocks=%u\n", *nblocks);
 		goto rgrp_error;
+	}
 
 	rgd->rd_free -= *nblocks;
 	if (dinode) {
@@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
 		for (x = 0; x < rlist->rl_rgrps; x++)
 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
 		kfree(rlist->rl_ghs);
+		rlist->rl_ghs = NULL;
 	}
 }
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8314dbc89..ca6e26729b8 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -13,6 +13,14 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+/* Since each block in the file system is represented by two bits in the
+ * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
+ * By reserving 32 blocks at a time, we can optimize / shortcut how we search
+ * through the bitmaps by looking a word at a time.
+ */
+#define RGRP_RSRV_MINBYTES 8
+#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
@@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
@@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
 extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
 extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
-/* This is how to tell if a reservation is "inplace" reserved: */
+/* This is how to tell if a multi-block reservation is "inplace" reserved: */
 static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
 {
 	if (ip->i_res && ip->i_res->rs_requested)
@@ -70,4 +81,22 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
 	return 0;
 }
 
+/* This is how to tell if a multi-block reservation is in the rgrp tree: */
+static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+{
+	if (rs && rs->rs_bi)
+		return 1;
+	return 0;
+}
+
+static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
+{
+	return (bi->bi_start * GFS2_NBBY) + blk;
+}
+
+static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
+{
+	return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
+}
+
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 788068758f3..b1502c4bc60 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		return error;
@@ -1550,6 +1554,9 @@ out_truncate:
 
 out_unlock:
 	/* Error path for case 1 */
+	if (gfs2_rs_active(ip->i_res))
+		gfs2_rs_deltree(ip->i_res);
+
 	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
 		gfs2_glock_dq(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 1b8b8158819..a25c252fe41 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -14,6 +14,7 @@
 #include <linux/ktime.h>
 #include "incore.h"
 #include "glock.h"
+#include "rgrp.h"
 
 #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
 #define glock_trace_name(x) __print_symbolic(x,		\
@@ -31,6 +32,17 @@
 			    { GFS2_BLKST_DINODE, "dinode" },	\
 			    { GFS2_BLKST_UNLINKED, "unlinked" })
 
+#define TRACE_RS_DELETE  0
+#define TRACE_RS_TREEDEL 1
+#define TRACE_RS_INSERT  2
+#define TRACE_RS_CLAIM   3
+
+#define rs_func_name(x) __print_symbolic(x,	\
+					 { 0, "del " },	\
+					 { 1, "tdel" },	\
+					 { 2, "ins " },	\
+					 { 3, "clm " })
+
 #define show_glock_flags(flags) __print_flags(flags, "",	\
 	{(1UL << GLF_LOCK),			"l" },		\
 	{(1UL << GLF_DEMOTE),			"D" },		\
@@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(	u8,	block_state		)
 		__field(        u64,	rd_addr			)
 		__field(        u32,	rd_free_clone		)
+		__field(	u32,	rd_reserved		)
 	),
 
 	TP_fast_assign(
@@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->block_state	= block_state;
 		__entry->rd_addr	= rgd->rd_addr;
 		__entry->rd_free_clone	= rgd->rd_free_clone;
+		__entry->rd_reserved	= rgd->rd_reserved;
 	),
 
-	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone)
+		  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+);
+
+/* Keep track of multi-block reservations as they are allocated/freed */
+TRACE_EVENT(gfs2_rs,
+
+	TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
+		 u8 func),
+
+	TP_ARGS(ip, rs, func),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	rd_addr			)
+		__field(	u32,	rd_free_clone		)
+		__field(	u32,	rd_reserved		)
+		__field(	u64,	inum			)
+		__field(	u64,	start			)
+		__field(	u32,	free			)
+		__field(	u8,	func			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
+		__entry->rd_addr	= rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
+		__entry->rd_free_clone	= rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
+		__entry->rd_reserved	= rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
+		__entry->inum		= ip ? ip->i_no_addr : 0;
+		__entry->start		= gfs2_rs_startblk(rs);
+		__entry->free		= rs->rs_free;
+		__entry->func		= func;
+	),
+
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
+		  "f:%lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->start,
+		  (unsigned long long)__entry->rd_addr,
+		  (unsigned long)__entry->rd_free_clone,
+		  (unsigned long)__entry->rd_reserved,
+		  rs_func_name(__entry->func), (unsigned long)__entry->free)
 );
 
 #endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 523c0de0d80..27a0b4a901f 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 {
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto out_alloc;
@@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	struct buffer_head *dibh;
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_lock_check(ip);
 	if (error)
 		return error;
@@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		return error;
-- 
cgit v1.2.3-70-g09d2


From c6727932cfdb13501108b16c38463c09d5ec7a74 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Date: Sat, 14 Jul 2012 14:33:09 +0300
Subject: UBIFS: fix a bug in empty space fix-up

UBIFS has a feature called "empty space fix-up" which is a quirk to work-around
limitations of dumb flasher programs. Namely, of those flashers that are unable
to skip NAND pages full of 0xFFs while flashing, resulting in empty space at
the end of half-filled eraseblocks to be unusable for UBIFS. This feature is
relatively new (introduced in v3.0).

The fix-up routine (fixup_free_space()) is executed only once at the very first
mount if the superblock has the 'space_fixup' flag set (can be done with -F
option of mkfs.ubifs). It basically reads all the UBIFS data and metadata and
writes it back to the same LEB. The routine assumes the image is pristine and
does not have anything in the journal.

There was a bug in 'fixup_free_space()' where it fixed up the log incorrectly.
All but one LEB of the log of a pristine file-system are empty. And one
contains just a commit start node. And 'fixup_free_space()' just unmapped this
LEB, which resulted in wiping the commit start node. As a result, some users
were unable to mount the file-system next time with the following symptom:

UBIFS error (pid 1): replay_log_leb: first log node at LEB 3:0 is not CS node
UBIFS error (pid 1): replay_log_leb: log error detected while replaying the log at LEB 3:0

The root-cause of this bug was that 'fixup_free_space()' wrongly assumed
that the beginning of empty space in the log head (c->lhead_offs) was known
on mount. However, it is not the case - it was always 0. UBIFS does not store
in it the master node and finds out by scanning the log on every mount.

The fix is simple - just pass commit start node size instead of 0 to
'fixup_leb()'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Cc: stable@vger.kernel.org [v3.0+]
Reported-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
Tested-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
Reported-by: James Nute <newten82@gmail.com>
---
 fs/ubifs/sb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d99..15e2fc5aa60 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
 		lnum = ubifs_next_log_lnum(c, lnum);
 	}
 
-	/* Fixup the current log head */
-	err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+	/*
+	 * Fixup the log head which contains the only a CS node at the
+	 * beginning.
+	 */
+	err = fixup_leb(c, c->lhead_lnum,
+			ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2


From 782759b9f5f5223e0962af60c3457c912fab755f Mon Sep 17 00:00:00 2001
From: Alexandre Pereira da Silva <aletes.xgr@gmail.com>
Date: Mon, 25 Jun 2012 17:47:49 -0300
Subject: UBIFS: fix compilation warning

Fix the following compilation warning:

fs/ubifs/dir.c: In function 'ubifs_rename':
fs/ubifs/dir.c:972:15: warning: 'saved_nlink' may be used uninitialized
in this function

Use the 'uninitialized_var()' macro to get rid of this false-positive.

Artem: massaged the patch a bit.

Signed-off-by: Alexandre Pereira da Silva <aletes.xgr@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index a6d42efc76d..390198d91e6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -969,7 +969,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
 			.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
 	struct timespec time;
-	unsigned int saved_nlink;
+	unsigned int uninitialized_var(saved_nlink);
 
 	/*
 	 * Budget request settings: deletion direntry, new direntry, removing
-- 
cgit v1.2.3-70-g09d2


From 06bef9451a4c5ad882cd15fd7a0df9890c0249f3 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Date: Sat, 14 Jul 2012 14:19:46 +0300
Subject: UBIFS: add debugfs knob to switch to R/O mode

This patch adds another debugfs knob which switches UBIFS to R/O mode.
I needed it while trying to reproduce the 'first log node is not CS node'
bug. Without this debugfs knob you have to perform a power cut to repruduce
the bug. The knob is named 'ro_error' and all it does is it sets the
'ro_error' UBIFS flag which makes UBIFS disallow any further writes - even
write-back will fail with -EROFS. Useful for debugging.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.c | 11 +++++++++++
 fs/ubifs/debug.h |  5 +++++
 2 files changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 92df3b08153..bb3167257aa 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2802,6 +2802,8 @@ static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
 		val = d->chk_fs;
 	else if (dent == d->dfs_tst_rcvry)
 		val = d->tst_rcvry;
+	else if (dent == d->dfs_ro_error)
+		val = c->ro_error;
 	else
 		return -EINVAL;
 
@@ -2885,6 +2887,8 @@ static ssize_t dfs_file_write(struct file *file, const char __user *u,
 		d->chk_fs = val;
 	else if (dent == d->dfs_tst_rcvry)
 		d->tst_rcvry = val;
+	else if (dent == d->dfs_ro_error)
+		c->ro_error = !!val;
 	else
 		return -EINVAL;
 
@@ -2996,6 +3000,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
 		goto out_remove;
 	d->dfs_tst_rcvry = dent;
 
+	fname = "ro_error";
+	dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+				   &dfs_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto out_remove;
+	d->dfs_ro_error = dent;
+
 	return 0;
 
 out_remove:
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 486a8e024fb..8b8cc4e945f 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -79,6 +79,10 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
  * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
  * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
  * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
+ * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to
+ *                re-mounting to R/O mode because it does not flush any buffers
+ *                and UBIFS just starts returning -EROFS on all write
+ *               operations)
  */
 struct ubifs_debug_info {
 	struct ubifs_zbranch old_zroot;
@@ -122,6 +126,7 @@ struct ubifs_debug_info {
 	struct dentry *dfs_chk_lprops;
 	struct dentry *dfs_chk_fs;
 	struct dentry *dfs_tst_rcvry;
+	struct dentry *dfs_ro_error;
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From d51f17ea0a3afe11fb4c4ad6635877e24df2758f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Date: Sat, 14 Jul 2012 20:52:58 +0300
Subject: UBIFS: simplify reply code a bit

In the log reply code we assume that 'c->lhead_offs' is known and may be
non-zero, which is not the case because we do not store it in the master
node and have to find out by scanning on every mount. Knowing this fact
allows us to simplify the log scanning loop a bit and remove a couple
of unneeded local variables.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
---
 fs/ubifs/replay.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 3a2da7e476e..eba46d4a761 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1007,7 +1007,7 @@ out:
  */
 int ubifs_replay_journal(struct ubifs_info *c)
 {
-	int err, i, lnum, offs, free;
+	int err, lnum, free;
 
 	BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
 
@@ -1025,25 +1025,17 @@ int ubifs_replay_journal(struct ubifs_info *c)
 	dbg_mnt("start replaying the journal");
 	c->replaying = 1;
 	lnum = c->ltail_lnum = c->lhead_lnum;
-	offs = c->lhead_offs;
 
-	for (i = 0; i < c->log_lebs; i++, lnum++) {
-		if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
-			/*
-			 * The log is logically circular, we reached the last
-			 * LEB, switch to the first one.
-			 */
-			lnum = UBIFS_LOG_LNUM;
-			offs = 0;
-		}
-		err = replay_log_leb(c, lnum, offs, c->sbuf);
+	lnum = UBIFS_LOG_LNUM;
+	do {
+		err = replay_log_leb(c, lnum, 0, c->sbuf);
 		if (err == 1)
 			/* We hit the end of the log */
 			break;
 		if (err)
 			goto out;
-		offs = 0;
-	}
+		lnum = ubifs_next_log_lnum(c, lnum);
+	} while (lnum != UBIFS_LOG_LNUM);
 
 	err = replay_buds(c);
 	if (err)
-- 
cgit v1.2.3-70-g09d2


From 7074e5eb233343e4bad8c0a3f9e73167cf85a159 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Mon, 9 Jul 2012 09:27:14 +0200
Subject: UBIFS: remove invalid reference to list iterator variable

If list_for_each_entry, etc complete a traversal of the list, the iterator
variable ends up pointing to an address at an offset from the list head,
and not a meaningful structure.  Thus this value should not be used after
the end of the iterator.  Replace a field access from orphan by NULL in two
places.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier c;
expression E;
iterator name list_for_each_entry;
statement S;
@@

list_for_each_entry(c,...) { ... when != break;
                                 when forall
                                 when strict
}
...
(
c = E
|
*c
)
// </smpl>

Artem: fortunately, this did not cause any issues because we iterate the orphan
list using the elements count, so we never dereferenced the corrupted pointer.
This is why I do not send this patch to -stable. But otherwise - well spotted!

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
---
 fs/ubifs/orphan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index b02734db187..cebf17ea045 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -176,7 +176,7 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
 		*last = orphan;
 		last = &orphan->cnext;
 	}
-	*last = orphan->cnext;
+	*last = NULL;
 	c->cmt_orphans = c->new_orphans;
 	c->new_orphans = 0;
 	dbg_cmt("%d orphans to commit", c->cmt_orphans);
@@ -382,7 +382,7 @@ static int consolidate(struct ubifs_info *c)
 			last = &orphan->cnext;
 			cnt += 1;
 		}
-		*last = orphan->cnext;
+		*last = NULL;
 		ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
 		c->cmt_orphans = cnt;
 		c->ohead_lnum = c->orph_first;
-- 
cgit v1.2.3-70-g09d2


From 9ff19309a9623f2963ac5a136782ea4d8b5d67fb Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 01:19:07 +0300
Subject: ore: Fix NFS crash by supporting any unaligned RAID IO

In RAID_5/6 We used to not permit an IO that it's end
byte is not stripe_size aligned and spans more than one stripe.
.i.e the caller must check if after submission the actual
transferred bytes is shorter, and would need to resubmit
a new IO with the remainder.

Exofs supports this, and NFS was supposed to support this
as well with it's short write mechanism. But late testing has
exposed a CRASH when this is used with none-RPC layout-drivers.

The change at NFS is deep and risky, in it's place the fix
at ORE to lift the limitation is actually clean and simple.
So here it is below.

The principal here is that in the case of unaligned IO on
both ends, beginning and end, we will send two read requests
one like old code, before the calculation of the first stripe,
and also a new site, before the calculation of the last stripe.
If any "boundary" is aligned or the complete IO is within a single
stripe. we do a single read like before.

The code is clean and simple by splitting the old _read_4_write
into 3 even parts:
1._read_4_write_first_stripe
2. _read_4_write_last_stripe
3. _read_4_write_execute

And calling 1+3 at the same place as before. 2+3 before last
stripe, and in the case of all in a single stripe then 1+2+3
is preformed additively.

Why did I not think of it before. Well I had a strike of
genius because I have stared at this code for 2 years, and did
not find this simple solution, til today. Not that I did not try.

This solution is much better for NFS than the previous supposedly
solution because the short write was dealt  with out-of-band after
IO_done, which would cause for a seeky IO pattern where as in here
we execute in order. At both solutions we do 2 separate reads, only
here we do it within a single IO request. (And actually combine two
writes into a single submission)

NFS/exofs code need not change since the ORE API communicates the new
shorter length on return, what will happen is that this case would not
occur anymore.

hurray!!

[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore_raid.c | 67 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1..fff2070c675 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
  * ios->sp2d[p][*], xor is calculated the same way. These pages are
  * allocated/freed and don't go through cache
  */
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
 {
-	struct ore_io_state *ios_read;
 	struct ore_striping_info read_si;
 	struct __stripe_pages_2d *sp2d = ios->sp2d;
 	u64 offset = ios->si.first_stripe_start;
-	u64 last_stripe_end;
-	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
-	unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-	int ret;
+	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
 
 	if (offset == ios->offset) /* Go to start collect $200 */
 		goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
 	min_p = _sp2d_min_pg(sp2d);
 	max_p = _sp2d_max_pg(sp2d);
 
+	ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+		   offset, ios->offset, min_p, max_p);
+
 	for (c = 0; ; c++) {
 		ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
 		read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
 	}
 
 read_last_stripe:
+	return 0;
+}
+
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+	struct ore_striping_info read_si;
+	struct __stripe_pages_2d *sp2d = ios->sp2d;
+	u64 offset;
+	u64 last_stripe_end;
+	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+
 	offset = ios->offset + ios->length;
 	if (offset % PAGE_SIZE)
 		_add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
 	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
 		       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
 
-	BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
-	/* unaligned IO must be within a single stripe */
-
 	if (min_p == sp2d->pages_in_unit) {
 		/* Didn't do it yet */
 		min_p = _sp2d_min_pg(sp2d);
 		max_p = _sp2d_max_pg(sp2d);
 	}
 
+	ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+		   offset, last_stripe_end, min_p, max_p);
+
 	while (offset < last_stripe_end) {
 		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
@@ -568,6 +579,15 @@ read_last_stripe:
 	}
 
 read_it:
+	return 0;
+}
+
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+	struct ore_io_state *ios_read;
+	unsigned i;
+	int ret;
+
 	ios_read = ios->ios_read_4_write;
 	if (!ios_read)
 		return 0;
@@ -591,6 +611,8 @@ read_it:
 	}
 
 	_mark_read4write_pages_uptodate(ios_read, ret);
+	ore_put_io_state(ios_read);
+	ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
 	return 0;
 }
 
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			/* If first stripe, Read in all read4write pages
 			 * (if needed) before we calculate the first parity.
 			 */
-			_read_4_write(ios);
+			_read_4_write_first_stripe(ios);
 		}
+		if (!cur_len) /* If last stripe r4w pages of last stripe */
+			_read_4_write_last_stripe(ios);
+		_read_4_write_execute(ios);
 
 		for (i = 0; i < num_pages; i++) {
 			pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 
 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
 {
-	struct ore_layout *layout = ios->layout;
-
 	if (ios->parity_pages) {
+		struct ore_layout *layout = ios->layout;
 		unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
-		unsigned stripe_size = ios->si.bytes_in_stripe;
-		u64 last_stripe, first_stripe;
 
 		if (_sp2d_alloc(pages_in_unit, layout->group_width,
 				layout->parity, &ios->sp2d)) {
 			return -ENOMEM;
 		}
-
-		/* Round io down to last full strip */
-		first_stripe = div_u64(ios->offset, stripe_size);
-		last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-
-		/* If an IO spans more then a single stripe it must end at
-		 * a stripe boundary. The reminder at the end is pushed into the
-		 * next IO.
-		 */
-		if (last_stripe != first_stripe) {
-			ios->length = last_stripe * stripe_size - ios->offset;
-
-			BUG_ON(!ios->length);
-			ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
-					PAGE_SIZE;
-			ios->si.length = ios->length; /*make it consistent */
-		}
 	}
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 04:30:40 +0300
Subject: ore: Remove support of partial IO request (NFS crash)

Do to OOM situations the ore might fail to allocate all resources
needed for IO of the full request. If some progress was possible
it would proceed with a partial/short request, for the sake of
forward progress.

Since this crashes NFS-core and exofs is just fine without it just
remove this contraption, and fail.

TODO:
	Support real forward progress with some reserved allocations
	of resources, such as mem pools and/or bio_sets

[Bug since 3.2 Kernel]
CC: Stable Tree <stable@kernel.org>
CC: Benny Halevy <bhalevy@tonian.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a..24a49d47e93 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 out:
 	ios->numdevs = devs_in_group;
 	ios->pages_consumed = cur_pg;
-	if (unlikely(ret)) {
-		if (length == ios->length)
-			return ret;
-		else
-			ios->length -= length;
-	}
-	return 0;
+	return ret;
 }
 
 int ore_create(struct ore_io_state *ios)
-- 
cgit v1.2.3-70-g09d2


From 537632e0a54a5355cdd0330911d18c3b773f9cf7 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Wed, 11 Jul 2012 15:27:13 +0300
Subject: ore: Unlock r4w pages in exact reverse order of locking

The read-4-write pages are locked in address ascending order.
But where unlocked in a way easiest for coding. Fix that,
locks should be released in opposite order of locking, .i.e
descending address order.

I have not hit this dead-lock. It was found by inspecting the
dbug print-outs. I suspect there is an higher lock at caller that
protects us, but fix it regardless.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore_raid.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index fff2070c675..5f376d14fdc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
 {
 	unsigned data_devs = sp2d->data_devs;
 	unsigned group_width = data_devs + sp2d->parity;
-	unsigned p;
+	int p, c;
 
 	if (!sp2d->needed)
 		return;
 
-	for (p = 0; p < sp2d->pages_in_unit; p++) {
-		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
-
-		if (_1ps->write_count < group_width) {
-			unsigned c;
+	for (c = data_devs - 1; c >= 0; --c)
+		for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
+			struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
-			for (c = 0; c < data_devs; c++)
-				if (_1ps->page_is_read[c]) {
-					struct page *page = _1ps->pages[c];
+			if (_1ps->page_is_read[c]) {
+				struct page *page = _1ps->pages[c];
 
-					r4w->put_page(priv, page);
-					_1ps->page_is_read[c] = false;
-				}
+				r4w->put_page(priv, page);
+				_1ps->page_is_read[c] = false;
+			}
 		}
 
+	for (p = 0; p < sp2d->pages_in_unit; p++) {
+		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+
 		memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
 		_1ps->write_count = 0;
 		_1ps->tx = NULL;
-- 
cgit v1.2.3-70-g09d2


From 9909d45a8557455ca5f8ee7af0f253debc851f1a Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 05:29:40 +0300
Subject: pnfs-obj: don't leak objio_state if ore_write/read fails

[Bug since 3.2 Kernel]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baeba..86d7595aca8 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
 	objios->ios->done = _read_done;
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
 		rdata->args.offset, rdata->args.count);
-	return ore_read(objios->ios);
+	ret = ore_read(objios->ios);
+	if (unlikely(ret))
+		objio_free_result(&objios->oir);
+	return ret;
 }
 
 /*
@@ -539,8 +542,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
 		wdata->args.offset, wdata->args.count);
 	ret = ore_write(objios->ios);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		objio_free_result(&objios->oir);
 		return ret;
+	}
 
 	if (objios->sync)
 		_write_done(objios->ios, objios);
-- 
cgit v1.2.3-70-g09d2


From c999ff68029ebd0f56ccae75444f640f6d5a27d2 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 02:02:30 +0300
Subject: pnfs-obj: Fix __r4w_get_page when offset is beyond i_size

It is very common for the end of the file to be unaligned on
stripe size. But since we know it's beyond file's end then
the XOR should be preformed with all zeros.

Old code used to just read zeros out of the OSD devices, which is a great
waist. But what scares me more about this situation is that, we now have
pages attached to the file's mapping that are beyond i_size. I don't
like the kind of bugs this calls for.

Fix both birds, by returning a global zero_page, if offset is beyond
i_size.

TODO:
	Change the API to ->__r4w_get_page() so a NULL can be
	returned without being considered as error, since XOR API
	treats NULL entries as zero_pages.

[Bug since 3.2. Should apply the same way to all Kernels since]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 86d7595aca8..f50d3e8d6f2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -489,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 	struct nfs_write_data *wdata = objios->oir.rpcdata;
 	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page = find_get_page(mapping, index);
+	struct page *page;
+	loff_t i_size = i_size_read(wdata->header->inode);
 
+	if (offset >= i_size) {
+		*uptodate = true;
+		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+		return ZERO_PAGE(0);
+	}
+
+	page = find_get_page(mapping, index);
 	if (!page) {
 		page = find_or_create_page(mapping, index, GFP_NOFS);
 		if (unlikely(!page)) {
@@ -510,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 
 static void __r4w_put_page(void *priv, struct page *page)
 {
-	dprintk("%s: index=0x%lx\n", __func__, page->index);
-	page_cache_release(page);
+	dprintk("%s: index=0x%lx\n", __func__,
+		(page == ZERO_PAGE(0)) ? -1UL : page->index);
+	if (ZERO_PAGE(0) != page)
+		page_cache_release(page);
 	return;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 15e1c960227dc22d976c270fc854dfe363c04bbd Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Fri, 20 Jul 2012 14:05:05 -0400
Subject: GFS2: Eliminate 64-bit divides

This patch removes the 64-bit divides introduced in the previous patch
in favor of shifting, so that it will compile properly on 32-bit machines.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9f94832cefe..9aa6af13823 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -384,7 +384,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return ret;
 
 	atomic_set(&ip->i_res->rs_sizehint,
-		   PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
+		   PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
@@ -669,7 +669,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret)
 		return ret;
 
-	atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
+	atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
 	if (file->f_flags & O_APPEND) {
 		struct gfs2_holder gh;
 
@@ -795,7 +795,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (unlikely(error))
 		goto out_uninit;
 
-	atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
+	atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
 
 	while (len > 0) {
 		if (len < bytes)
-- 
cgit v1.2.3-70-g09d2


From 1e0ea0014479f066ba26f937e8740b8902229616 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Jul 2012 23:46:21 +0400
Subject: use __lookup_hash() in kern_path_parent()

No need to bother with lookup_one_len() here - it's an overkill

Signed-off-by Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1b647468769..c14dfac83c2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1875,7 +1875,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
 		return ERR_PTR(-EINVAL);
 	}
 	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	d = __lookup_hash(&nd.last, nd.path.dentry, 0);
 	if (IS_ERR(d)) {
 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 		path_put(&nd.path);
-- 
cgit v1.2.3-70-g09d2


From 4a9d4b024a3102fc083c925c242d98ac27b1c5f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jun 2012 09:56:45 +0400
Subject: switch fput to task_work_add

... and schedule_work() for interrupt/kernel_thread callers
(and yes, now it *is* OK to call from interrupt).

We are guaranteed that __fput() will be done before we return
to userland (or exit).  Note that for fput() from a kernel
thread we get an async behaviour; it's almost always OK, but
sometimes you might need to have __fput() completed before
you do anything else.  There are two mechanisms for that -
a general barrier (flush_delayed_fput()) and explicit
__fput_sync().  Both should be used with care (as was the
case for fput() from kernel threads all along).  See comments
in fs/file_table.c for details.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/file.h |  3 +++
 init/main.c          |  3 ++-
 3 files changed, 75 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 9ace2781931..b3fc4d67a26 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -23,6 +23,8 @@
 #include <linux/lglock.h>
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/task_work.h>
 #include <linux/ima.h>
 
 #include <linux/atomic.h>
@@ -251,7 +253,6 @@ static void __fput(struct file *file)
 	}
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_sb_list_del(file);
 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_dec(inode);
 	if (file->f_mode & FMODE_WRITE)
@@ -263,10 +264,77 @@ static void __fput(struct file *file)
 	mntput(mnt);
 }
 
+static DEFINE_SPINLOCK(delayed_fput_lock);
+static LIST_HEAD(delayed_fput_list);
+static void delayed_fput(struct work_struct *unused)
+{
+	LIST_HEAD(head);
+	spin_lock_irq(&delayed_fput_lock);
+	list_splice_init(&delayed_fput_list, &head);
+	spin_unlock_irq(&delayed_fput_lock);
+	while (!list_empty(&head)) {
+		struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
+		list_del_init(&f->f_u.fu_list);
+		__fput(f);
+	}
+}
+
+static void ____fput(struct callback_head *work)
+{
+	__fput(container_of(work, struct file, f_u.fu_rcuhead));
+}
+
+/*
+ * If kernel thread really needs to have the final fput() it has done
+ * to complete, call this.  The only user right now is the boot - we
+ * *do* need to make sure our writes to binaries on initramfs has
+ * not left us with opened struct file waiting for __fput() - execve()
+ * won't work without that.  Please, don't add more callers without
+ * very good reasons; in particular, never call that with locks
+ * held and never call that from a thread that might need to do
+ * some work on any kind of umount.
+ */
+void flush_delayed_fput(void)
+{
+	delayed_fput(NULL);
+}
+
+static DECLARE_WORK(delayed_fput_work, delayed_fput);
+
 void fput(struct file *file)
 {
-	if (atomic_long_dec_and_test(&file->f_count))
+	if (atomic_long_dec_and_test(&file->f_count)) {
+		struct task_struct *task = current;
+		file_sb_list_del(file);
+		if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
+			unsigned long flags;
+			spin_lock_irqsave(&delayed_fput_lock, flags);
+			list_add(&file->f_u.fu_list, &delayed_fput_list);
+			schedule_work(&delayed_fput_work);
+			spin_unlock_irqrestore(&delayed_fput_lock, flags);
+			return;
+		}
+		init_task_work(&file->f_u.fu_rcuhead, ____fput);
+		task_work_add(task, &file->f_u.fu_rcuhead, true);
+	}
+}
+
+/*
+ * synchronous analog of fput(); for kernel threads that might be needed
+ * in some umount() (and thus can't use flush_delayed_fput() without
+ * risking deadlocks), need to wait for completion of __fput() and know
+ * for this specific struct file it won't involve anything that would
+ * need them.  Use only if you really need it - at the very least,
+ * don't blindly convert fput() by kernel thread to that.
+ */
+void __fput_sync(struct file *file)
+{
+	if (atomic_long_dec_and_test(&file->f_count)) {
+		struct task_struct *task = current;
+		file_sb_list_del(file);
+		BUG_ON(!(task->flags & PF_KTHREAD));
 		__fput(file);
+	}
 }
 
 EXPORT_SYMBOL(fput);
diff --git a/include/linux/file.h b/include/linux/file.h
index 58bf158c53d..a22408bac0d 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);
 
+extern void flush_delayed_fput(void);
+extern void __fput_sync(struct file *);
+
 #endif /* __LINUX_FILE_H */
diff --git a/init/main.c b/init/main.c
index b5cc0a7c470..3f151f6c6da 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
+#include <linux/file.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -804,8 +805,8 @@ static noinline int init_post(void)
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
-
 	current->signal->flags |= SIGNAL_UNKILLABLE;
+	flush_delayed_fput();
 
 	if (ramdisk_execute_command) {
 		run_init_process(ramdisk_execute_command);
-- 
cgit v1.2.3-70-g09d2


From 3ffa3c0e3f6e62f67fc2346ca60161dfb030083d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jun 2012 10:00:10 +0400
Subject: aio: now fput() is OK from interrupt context; get rid of manual
 delayed __fput()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 73 +++-------------------------------------------------------------
 1 file changed, 3 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 55c4c765605..71f613cf4a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -56,13 +56,6 @@ static struct kmem_cache	*kioctx_cachep;
 
 static struct workqueue_struct *aio_wq;
 
-/* Used for rare fput completion. */
-static void aio_fput_routine(struct work_struct *);
-static DECLARE_WORK(fput_work, aio_fput_routine);
-
-static DEFINE_SPINLOCK(fput_lock);
-static LIST_HEAD(fput_head);
-
 static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
@@ -479,7 +472,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
 {
 	unsigned short allocated, to_alloc;
 	long avail;
-	bool called_fput = false;
 	struct kiocb *req, *n;
 	struct aio_ring *ring;
 
@@ -495,28 +487,11 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
 	if (allocated == 0)
 		goto out;
 
-retry:
 	spin_lock_irq(&ctx->ctx_lock);
 	ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
 
 	avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
 	BUG_ON(avail < 0);
-	if (avail == 0 && !called_fput) {
-		/*
-		 * Handle a potential starvation case.  It is possible that
-		 * we hold the last reference on a struct file, causing us
-		 * to delay the final fput to non-irq context.  In this case,
-		 * ctx->reqs_active is artificially high.  Calling the fput
-		 * routine here may free up a slot in the event completion
-		 * ring, allowing this allocation to succeed.
-		 */
-		kunmap_atomic(ring);
-		spin_unlock_irq(&ctx->ctx_lock);
-		aio_fput_routine(NULL);
-		called_fput = true;
-		goto retry;
-	}
-
 	if (avail < allocated) {
 		/* Trim back the number of requests. */
 		list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
@@ -570,36 +545,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 		wake_up_all(&ctx->wait);
 }
 
-static void aio_fput_routine(struct work_struct *data)
-{
-	spin_lock_irq(&fput_lock);
-	while (likely(!list_empty(&fput_head))) {
-		struct kiocb *req = list_kiocb(fput_head.next);
-		struct kioctx *ctx = req->ki_ctx;
-
-		list_del(&req->ki_list);
-		spin_unlock_irq(&fput_lock);
-
-		/* Complete the fput(s) */
-		if (req->ki_filp != NULL)
-			fput(req->ki_filp);
-
-		/* Link the iocb into the context's free list */
-		rcu_read_lock();
-		spin_lock_irq(&ctx->ctx_lock);
-		really_put_req(ctx, req);
-		/*
-		 * at that point ctx might've been killed, but actual
-		 * freeing is RCU'd
-		 */
-		spin_unlock_irq(&ctx->ctx_lock);
-		rcu_read_unlock();
-
-		spin_lock_irq(&fput_lock);
-	}
-	spin_unlock_irq(&fput_lock);
-}
-
 /* __aio_put_req
  *	Returns true if this put was the last user of the request.
  */
@@ -618,21 +563,9 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	req->ki_cancel = NULL;
 	req->ki_retry = NULL;
 
-	/*
-	 * Try to optimize the aio and eventfd file* puts, by avoiding to
-	 * schedule work in case it is not final fput() time. In normal cases,
-	 * we would not be holding the last reference to the file*, so
-	 * this function will be executed w/out any aio kthread wakeup.
-	 */
-	if (unlikely(!fput_atomic(req->ki_filp))) {
-		spin_lock(&fput_lock);
-		list_add(&req->ki_list, &fput_head);
-		spin_unlock(&fput_lock);
-		schedule_work(&fput_work);
-	} else {
-		req->ki_filp = NULL;
-		really_put_req(ctx, req);
-	}
+	fput(req->ki_filp);
+	req->ki_filp = NULL;
+	really_put_req(ctx, req);
 	return 1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 0a81861978deedfe9267d9fe905c756d3af3af38 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:26:28 +0300
Subject: hfsplus: make hfsplus_sync_fs static

... because it is used only in fs/hfsplus/super.c.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/hfsplus_fs.h | 1 -
 fs/hfsplus/super.c      | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 4e75ac646fe..66a9365041b 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -428,7 +428,6 @@ int hfsplus_show_options(struct seq_file *, struct dentry *);
 
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
-int hfsplus_sync_fs(struct super_block *sb, int wait);
 
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a9bca4b8768..5df771e4ddd 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -161,7 +161,7 @@ static void hfsplus_evict_inode(struct inode *inode)
 	}
 }
 
-int hfsplus_sync_fs(struct super_block *sb, int wait)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct hfsplus_vh *vhdr = sbi->s_vhdr;
-- 
cgit v1.2.3-70-g09d2


From b7a90e8043e7ab1922126e1c1c5c004b470f9e2a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:26:29 +0300
Subject: hfsplus: amend debugging print

Print correct function name in the debugging print of the
'hfsplus_sync_fs()' function.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5df771e4ddd..9e9c278830d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -171,7 +171,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 	if (!wait)
 		return 0;
 
-	dprint(DBG_SUPER, "hfsplus_write_super\n");
+	dprint(DBG_SUPER, "hfsplus_sync_fs\n");
 
 	sb->s_dirt = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 58770d7e83eede5fafbcdf54a604277d70010705 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:26:30 +0300
Subject: hfsplus: remove useless check

This check is useless because we always have 'sb->s_fs_info' to be non-NULL.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/super.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9e9c278830d..f4f3d546306 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -240,9 +240,6 @@ static void hfsplus_put_super(struct super_block *sb)
 
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 
-	if (!sb->s_fs_info)
-		return;
-
 	if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
 		struct hfsplus_vh *vhdr = sbi->s_vhdr;
 
-- 
cgit v1.2.3-70-g09d2


From 9e6c5829b07c9ba6668807631914efc557fab059 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:26:31 +0300
Subject: hfsplus: get rid of write_super

This patch makes hfsplus stop using the VFS '->write_super()' method along with
the 's_dirt' superblock flag, because they are on their way out.

The whole "superblock write-out" VFS infrastructure is served by the
'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and
writes out all dirty superblocks using the '->write_super()' call-back.  But the
problem with this thread is that it wastes power by waking up the system every
5 seconds, even if there are no diry superblocks, or there are no client
file-systems which would need this (e.g., btrfs does not use
'->write_super()'). So we want to kill it completely and thus, we need to make
file-systems to stop using the '->write_super()' VFS service, and then remove
it together with the kernel thread.

Tested using fsstress from the LTP project.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/bitmap.c     |  4 ++--
 fs/hfsplus/dir.c        |  2 +-
 fs/hfsplus/hfsplus_fs.h |  6 +++++-
 fs/hfsplus/inode.c      |  6 +++---
 fs/hfsplus/super.c      | 41 ++++++++++++++++++++++++++++++++---------
 5 files changed, 43 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 1cad80c789c..4cfbe2edd29 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -153,7 +153,7 @@ done:
 	kunmap(page);
 	*max = offset + (curr - pptr) * 32 + i - start;
 	sbi->free_blocks -= *max;
-	sb->s_dirt = 1;
+	hfsplus_mark_mdb_dirty(sb);
 	dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
 out:
 	mutex_unlock(&sbi->alloc_mutex);
@@ -228,7 +228,7 @@ out:
 	set_page_dirty(page);
 	kunmap(page);
 	sbi->free_blocks += len;
-	sb->s_dirt = 1;
+	hfsplus_mark_mdb_dirty(sb);
 	mutex_unlock(&sbi->alloc_mutex);
 
 	return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 378ea0c43f1..6b9f921ef2f 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -316,7 +316,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 	sbi->file_count++;
-	dst_dir->i_sb->s_dirt = 1;
+	hfsplus_mark_mdb_dirty(dst_dir->i_sb);
 out:
 	mutex_unlock(&sbi->vh_mutex);
 	return res;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 66a9365041b..558dbb463a4 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -153,8 +153,11 @@ struct hfsplus_sb_info {
 	gid_t gid;
 
 	int part, session;
-
 	unsigned long flags;
+
+	int work_queued;               /* non-zero delayed work is queued */
+	struct delayed_work sync_work; /* FS sync delayed work */
+	spinlock_t work_lock;          /* protects sync_work and work_queued */
 };
 
 #define HFSPLUS_SB_WRITEBACKUP	0
@@ -428,6 +431,7 @@ int hfsplus_show_options(struct seq_file *, struct dentry *);
 
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
+void hfsplus_mark_mdb_dirty(struct super_block *sb);
 
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 7009265b746..3d8b4a675ba 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -431,7 +431,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
 		sbi->file_count++;
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
-	sb->s_dirt = 1;
+	hfsplus_mark_mdb_dirty(sb);
 
 	return inode;
 }
@@ -442,7 +442,7 @@ void hfsplus_delete_inode(struct inode *inode)
 
 	if (S_ISDIR(inode->i_mode)) {
 		HFSPLUS_SB(sb)->folder_count--;
-		sb->s_dirt = 1;
+		hfsplus_mark_mdb_dirty(sb);
 		return;
 	}
 	HFSPLUS_SB(sb)->file_count--;
@@ -455,7 +455,7 @@ void hfsplus_delete_inode(struct inode *inode)
 		inode->i_size = 0;
 		hfsplus_file_truncate(inode);
 	}
-	sb->s_dirt = 1;
+	hfsplus_mark_mdb_dirty(sb);
 }
 
 void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index f4f3d546306..47333209801 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -124,7 +124,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
 
 	if (fork->total_size != cpu_to_be64(inode->i_size)) {
 		set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags);
-		inode->i_sb->s_dirt = 1;
+		hfsplus_mark_mdb_dirty(inode->i_sb);
 	}
 	hfsplus_inode_write_fork(inode, fork);
 	if (tree)
@@ -173,8 +173,6 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
 
 	dprint(DBG_SUPER, "hfsplus_sync_fs\n");
 
-	sb->s_dirt = 0;
-
 	/*
 	 * Explicitly write out the special metadata inodes.
 	 *
@@ -226,12 +224,34 @@ out:
 	return error;
 }
 
-static void hfsplus_write_super(struct super_block *sb)
+static void delayed_sync_fs(struct work_struct *work)
 {
-	if (!(sb->s_flags & MS_RDONLY))
-		hfsplus_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
+	struct hfsplus_sb_info *sbi;
+
+	sbi = container_of(work, struct hfsplus_sb_info, sync_work.work);
+
+	spin_lock(&sbi->work_lock);
+	sbi->work_queued = 0;
+	spin_unlock(&sbi->work_lock);
+
+	hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+}
+
+void hfsplus_mark_mdb_dirty(struct super_block *sb)
+{
+	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
+	unsigned long delay;
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	spin_lock(&sbi->work_lock);
+	if (!sbi->work_queued) {
+		delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+		queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
+		sbi->work_queued = 1;
+	}
+	spin_unlock(&sbi->work_lock);
 }
 
 static void hfsplus_put_super(struct super_block *sb)
@@ -240,6 +260,8 @@ static void hfsplus_put_super(struct super_block *sb)
 
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 
+	cancel_delayed_work_sync(&sbi->sync_work);
+
 	if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
 		struct hfsplus_vh *vhdr = sbi->s_vhdr;
 
@@ -325,7 +347,6 @@ static const struct super_operations hfsplus_sops = {
 	.write_inode	= hfsplus_write_inode,
 	.evict_inode	= hfsplus_evict_inode,
 	.put_super	= hfsplus_put_super,
-	.write_super	= hfsplus_write_super,
 	.sync_fs	= hfsplus_sync_fs,
 	.statfs		= hfsplus_statfs,
 	.remount_fs	= hfsplus_remount,
@@ -352,6 +373,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = sbi;
 	mutex_init(&sbi->alloc_mutex);
 	mutex_init(&sbi->vh_mutex);
+	spin_lock_init(&sbi->work_lock);
+	INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
 	hfsplus_fill_defaults(sbi);
 
 	err = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 715189d836ab276b3d0fc114681f12b423686ffa Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:44 +0300
Subject: hfs: push lock_super down

HFS uses 'lock_super()'/'unlock_super()' around 'hfs_mdb_commit()' in order
to serialize MDB (Master Directory Block) changes. Push it down to
'hfs_mdb_commit()' in order to simplify the code a bit.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c | 2 --
 fs/hfs/mdb.c   | 2 ++
 fs/hfs/super.c | 4 ----
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 451c97281b8..f2deefdb406 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -645,11 +645,9 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
 	if (sb->s_dirt) {
-		lock_super(sb);
 		sb->s_dirt = 0;
 		if (!(sb->s_flags & MS_RDONLY))
 			hfs_mdb_commit(sb);
-		unlock_super(sb);
 	}
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 1563d5ce576..3f558d58fba 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -260,6 +260,7 @@ void hfs_mdb_commit(struct super_block *sb)
 {
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 
+	lock_super(sb);
 	if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) {
 		/* These parameters may have been modified, so write them back */
 		mdb->drLsMod = hfs_mtime();
@@ -317,6 +318,7 @@ void hfs_mdb_commit(struct super_block *sb)
 			size -= len;
 		}
 	}
+	unlock_super(sb);
 }
 
 void hfs_mdb_close(struct super_block *sb)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 7b4c537d6e1..f7c06bbf33b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -50,21 +50,17 @@ MODULE_LICENSE("GPL");
  */
 static void hfs_write_super(struct super_block *sb)
 {
-	lock_super(sb);
 	sb->s_dirt = 0;
 
 	/* sync everything to the buffers */
 	if (!(sb->s_flags & MS_RDONLY))
 		hfs_mdb_commit(sb);
-	unlock_super(sb);
 }
 
 static int hfs_sync_fs(struct super_block *sb, int wait)
 {
-	lock_super(sb);
 	hfs_mdb_commit(sb);
 	sb->s_dirt = 0;
-	unlock_super(sb);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From b59352359d6555aa8537d74ac9b15c1c6bcf3c68 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:45 +0300
Subject: hfs: get rid of lock_super

Stop using lock_super for serializing the MDB changes - use the buffer-head own
lock instead. Tested with fsstress.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/mdb.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 3f558d58fba..7a3224049f3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -260,7 +260,7 @@ void hfs_mdb_commit(struct super_block *sb)
 {
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 
-	lock_super(sb);
+	lock_buffer(HFS_SB(sb)->mdb_bh);
 	if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) {
 		/* These parameters may have been modified, so write them back */
 		mdb->drLsMod = hfs_mtime();
@@ -284,9 +284,13 @@ void hfs_mdb_commit(struct super_block *sb)
 				     &mdb->drXTFlSize, NULL);
 		hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec,
 				     &mdb->drCTFlSize, NULL);
+
+		lock_buffer(HFS_SB(sb)->alt_mdb_bh);
 		memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE);
 		HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
 		HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
+		unlock_buffer(HFS_SB(sb)->alt_mdb_bh);
+
 		mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
 		sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
 	}
@@ -309,7 +313,11 @@ void hfs_mdb_commit(struct super_block *sb)
 				break;
 			}
 			len = min((int)sb->s_blocksize - off, size);
+
+			lock_buffer(bh);
 			memcpy(bh->b_data + off, ptr, len);
+			unlock_buffer(bh);
+
 			mark_buffer_dirty(bh);
 			brelse(bh);
 			block++;
@@ -318,7 +326,7 @@ void hfs_mdb_commit(struct super_block *sb)
 			size -= len;
 		}
 	}
-	unlock_super(sb);
+	unlock_buffer(HFS_SB(sb)->mdb_bh);
 }
 
 void hfs_mdb_close(struct super_block *sb)
-- 
cgit v1.2.3-70-g09d2


From a3742d4828caeffab4cb075b562a4ed92d3e74d6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:46 +0300
Subject: hfs: remove extra mdb write on unmount

HFS calls 'hfs_write_super()' from 'hfs_put_super()' in order to write the MDB
to the media. However, it is not needed because VFS calls '->sync_fs()' before
calling '->put_super()' - so by the time we are in 'hfs_write_super()', the MDB
is already synchronized.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/super.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index f7c06bbf33b..47e4119a065 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -74,8 +74,6 @@ static int hfs_sync_fs(struct super_block *sb, int wait)
  */
 static void hfs_put_super(struct super_block *sb)
 {
-	if (sb->s_dirt)
-		hfs_write_super(sb);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
-- 
cgit v1.2.3-70-g09d2


From 4527440d5db8ff27ae8801de3f819843a1e6c6f6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:47 +0300
Subject: hfs: simplify a bit checking for R/O

We have the following pattern in 2 places in HFS

if (!RDONLY)
	hfs_mdb_commit();

This patch pushes the RDONLY check down to 'hfs_mdb_commit()'. This will
make the following patches a bit simpler.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c | 3 +--
 fs/hfs/mdb.c   | 3 +++
 fs/hfs/super.c | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f2deefdb406..90c1ccbff8e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -646,8 +646,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 	sb = inode->i_sb;
 	if (sb->s_dirt) {
 		sb->s_dirt = 0;
-		if (!(sb->s_flags & MS_RDONLY))
-			hfs_mdb_commit(sb);
+		hfs_mdb_commit(sb);
 	}
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 7a3224049f3..5fd51a5833f 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -260,6 +260,9 @@ void hfs_mdb_commit(struct super_block *sb)
 {
 	struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
 
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
 	lock_buffer(HFS_SB(sb)->mdb_bh);
 	if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) {
 		/* These parameters may have been modified, so write them back */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 47e4119a065..0730135b771 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -53,8 +53,7 @@ static void hfs_write_super(struct super_block *sb)
 	sb->s_dirt = 0;
 
 	/* sync everything to the buffers */
-	if (!(sb->s_flags & MS_RDONLY))
-		hfs_mdb_commit(sb);
+	hfs_mdb_commit(sb);
 }
 
 static int hfs_sync_fs(struct super_block *sb, int wait)
-- 
cgit v1.2.3-70-g09d2


From b16ca626358cbf056b752eab63ba8f20087afeaf Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:48 +0300
Subject: hfs: introduce VFS superblock object back-reference

Add an 'sb' VFS superblock back-reference to the 'struct hfs_sb_info' data
structure - we will need to find the VFS superblock from a
'struct hfs_sb_info' object in the next patch, so this change is jut a
preparation.

Remove few useless newlines while on it.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/hfs_fs.h | 6 +-----
 fs/hfs/super.c  | 1 +
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 1bf967c6bfd..351561223ec 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -137,16 +137,12 @@ struct hfs_sb_info {
 	gid_t s_gid;				/* The gid of all files */
 
 	int session, part;
-
 	struct nls_table *nls_io, *nls_disk;
-
 	struct mutex bitmap_lock;
-
 	unsigned long flags;
-
 	u16 blockoffset;
-
 	int fs_div;
+	struct super_block *sb;
 };
 
 #define HFS_FLG_BITMAP_DIRTY	0
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0730135b771..99c6239bc3a 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -380,6 +380,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi)
 		return -ENOMEM;
 
+	sbi->sb = sb;
 	sb->s_fs_info = sbi;
 
 	res = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 5687b5780e90278a62d4cd916a3632087066f59d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 17:28:49 +0300
Subject: hfs: get rid of hfs_sync_super

This patch makes hfs stop using the VFS '->write_super()' method along with
the 's_dirt' superblock flag, because they are on their way out.

The whole "superblock write-out" VFS infrastructure is served by the
'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and
writes out all dirty superblocks using the '->write_super()' call-back.  But the
problem with this thread is that it wastes power by waking up the system every
5 seconds, even if there are no diry superblocks, or there are no client
file-systems which would need this (e.g., btrfs does not use
'->write_super()'). So we want to kill it completely and thus, we need to make
file-systems to stop using the '->write_super()' VFS service, and then remove
it together with the kernel thread.

Tested using fsstress from the LTP project.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/extent.c |  2 +-
 fs/hfs/hfs_fs.h |  9 +++++++-
 fs/hfs/inode.c  | 11 ++++------
 fs/hfs/super.c  | 65 +++++++++++++++++++++++++++++++--------------------------
 4 files changed, 48 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 2c16316d291..a67955a0c36 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -432,7 +432,7 @@ out:
 		if (inode->i_ino < HFS_FIRSTUSER_CNID)
 			set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags);
 		set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-		sb->s_dirt = 1;
+		hfs_mark_mdb_dirty(sb);
 	}
 	return res;
 
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 351561223ec..8275175acf6 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
+#include <linux/workqueue.h>
 
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
@@ -143,6 +144,9 @@ struct hfs_sb_info {
 	u16 blockoffset;
 	int fs_div;
 	struct super_block *sb;
+	int work_queued;		/* non-zero delayed work is queued */
+	struct delayed_work mdb_work;	/* MDB flush delayed work */
+	spinlock_t work_lock;		/* protects mdb_work and work_queued */
 };
 
 #define HFS_FLG_BITMAP_DIRTY	0
@@ -222,6 +226,9 @@ extern int hfs_compare_dentry(const struct dentry *parent,
 extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
 extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
 
+/* super.c */
+extern void hfs_mark_mdb_dirty(struct super_block *sb);
+
 extern struct timezone sys_tz;
 
 /*
@@ -249,7 +256,7 @@ static inline const char *hfs_mdb_name(struct super_block *sb)
 static inline void hfs_bitmap_dirty(struct super_block *sb)
 {
 	set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags);
-	sb->s_dirt = 1;
+	hfs_mark_mdb_dirty(sb);
 }
 
 #define sb_bread512(sb, sec, data) ({			\
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 90c1ccbff8e..ee1bc55677f 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -220,7 +220,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 	set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-	sb->s_dirt = 1;
+	hfs_mark_mdb_dirty(sb);
 
 	return inode;
 }
@@ -235,7 +235,7 @@ void hfs_delete_inode(struct inode *inode)
 		if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
 			HFS_SB(sb)->root_dirs--;
 		set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-		sb->s_dirt = 1;
+		hfs_mark_mdb_dirty(sb);
 		return;
 	}
 	HFS_SB(sb)->file_count--;
@@ -248,7 +248,7 @@ void hfs_delete_inode(struct inode *inode)
 		}
 	}
 	set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-	sb->s_dirt = 1;
+	hfs_mark_mdb_dirty(sb);
 }
 
 void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
@@ -644,10 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	if (sb->s_dirt) {
-		sb->s_dirt = 0;
-		hfs_mdb_commit(sb);
-	}
+	flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
 	if (!ret)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 99c6239bc3a..4eb873e0c07 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -29,38 +29,9 @@ static struct kmem_cache *hfs_inode_cachep;
 
 MODULE_LICENSE("GPL");
 
-/*
- * hfs_write_super()
- *
- * Description:
- *   This function is called by the VFS only. When the filesystem
- *   is mounted r/w it updates the MDB on disk.
- * Input Variable(s):
- *   struct super_block *sb: Pointer to the hfs superblock
- * Output Variable(s):
- *   NONE
- * Returns:
- *   void
- * Preconditions:
- *   'sb' points to a "valid" (struct super_block).
- * Postconditions:
- *   The MDB is marked 'unsuccessfully unmounted' by clearing bit 8 of drAtrb
- *   (hfs_put_super() must set this flag!). Some MDB fields are updated
- *   and the MDB buffer is written to disk by calling hfs_mdb_commit().
- */
-static void hfs_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-
-	/* sync everything to the buffers */
-	hfs_mdb_commit(sb);
-}
-
 static int hfs_sync_fs(struct super_block *sb, int wait)
 {
 	hfs_mdb_commit(sb);
-	sb->s_dirt = 0;
-
 	return 0;
 }
 
@@ -73,11 +44,44 @@ static int hfs_sync_fs(struct super_block *sb, int wait)
  */
 static void hfs_put_super(struct super_block *sb)
 {
+	cancel_delayed_work_sync(&HFS_SB(sb)->mdb_work);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
 }
 
+static void flush_mdb(struct work_struct *work)
+{
+	struct hfs_sb_info *sbi;
+	struct super_block *sb;
+
+	sbi = container_of(work, struct hfs_sb_info, mdb_work.work);
+	sb = sbi->sb;
+
+	spin_lock(&sbi->work_lock);
+	sbi->work_queued = 0;
+	spin_unlock(&sbi->work_lock);
+
+	hfs_mdb_commit(sb);
+}
+
+void hfs_mark_mdb_dirty(struct super_block *sb)
+{
+	struct hfs_sb_info *sbi = HFS_SB(sb);
+	unsigned long delay;
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	spin_lock(&sbi->work_lock);
+	if (!sbi->work_queued) {
+		delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+		queue_delayed_work(system_long_wq, &sbi->mdb_work, delay);
+		sbi->work_queued = 1;
+	}
+	spin_unlock(&sbi->work_lock);
+}
+
 /*
  * hfs_statfs()
  *
@@ -177,7 +181,6 @@ static const struct super_operations hfs_super_operations = {
 	.write_inode	= hfs_write_inode,
 	.evict_inode	= hfs_evict_inode,
 	.put_super	= hfs_put_super,
-	.write_super	= hfs_write_super,
 	.sync_fs	= hfs_sync_fs,
 	.statfs		= hfs_statfs,
 	.remount_fs     = hfs_remount,
@@ -382,6 +385,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	sbi->sb = sb;
 	sb->s_fs_info = sbi;
+	spin_lock_init(&sbi->work_lock);
+	INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb);
 
 	res = -EINVAL;
 	if (!parse_options((char *)data, sbi)) {
-- 
cgit v1.2.3-70-g09d2


From a4d05d315a4fdf5ccb0dbf0ce38bac12d522d33e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Tue, 3 Jul 2012 16:43:26 +0300
Subject: fs/sysv: remove useless write_super call

We do not need to call 'sysv_write_super()' from 'sysv_put_super()',
because VFS has called 'sysv_sync_fs()' before calling '->put_super()'.
So remove it.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 08d0b2568cd..af13d1342f2 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -81,9 +81,6 @@ static void sysv_put_super(struct super_block *sb)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
-	if (sb->s_dirt)
-		sysv_write_super(sb);
-
 	if (!(sb->s_flags & MS_RDONLY)) {
 		/* XXX ext2 also updates the state here */
 		mark_buffer_dirty(sbi->s_bh1);
-- 
cgit v1.2.3-70-g09d2


From eee458936b52bd3a9ff0ff577313b637905fff08 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Tue, 3 Jul 2012 16:43:27 +0300
Subject: fs/sysv: remove another useless write_super call

We do not need to call 'sysv_write_super()' from 'sysv_remount()',
because VFS has called 'sysv_sync_fs()' before calling '->remount()'.
So remove it. Remove also '(un)lock_super()' which obvioulsy is becoming
useless in this function.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/inode.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index af13d1342f2..f20ffe32b39 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -68,12 +68,9 @@ static void sysv_write_super(struct super_block *sb)
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
-	lock_super(sb);
+
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
-	if (*flags & MS_RDONLY)
-		sysv_write_super(sb);
-	unlock_super(sb);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9d46be294d12871adf4206f89168b14d27adb8b5 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Tue, 3 Jul 2012 16:43:28 +0300
Subject: fs/sysv: stop using write_super and s_dirt

It does not look like sysv FS needs 'write_super()' at all, because all it
does is a timestamp update. I cannot test this patch, because this
file-system is so old and probably has not been used by anyone for years,
so there are no tools to create it in Linux. But from the code I see that
marking the superblock as dirty is basically marking the superblock buffers as
drity and then setting the s_dirt flag. And when 'write_super()' is executed to
handle the s_dirt flag, we just update the timestamp and again mark the
superblock buffer as dirty. Seems pointless.

It looks like we can update the timestamp more opprtunistically - on unmount
or remount of sync, and nothing should change.

Thus, this patch removes 'sysv_write_super()' and 's_dirt'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/inode.c | 10 ----------
 fs/sysv/sysv.h  |  1 -
 2 files changed, 11 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index f20ffe32b39..80e1e2b18df 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -43,7 +43,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
 	 * then attach current time stamp.
 	 * But if the filesystem was marked clean, keep it clean.
 	 */
-	sb->s_dirt = 0;
 	old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
 	if (sbi->s_type == FSTYPE_SYSV4) {
 		if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
@@ -57,14 +56,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
-static void sysv_write_super(struct super_block *sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		sysv_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
-}
-
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
@@ -351,7 +342,6 @@ const struct super_operations sysv_sops = {
 	.write_inode	= sysv_write_inode,
 	.evict_inode	= sysv_evict_inode,
 	.put_super	= sysv_put_super,
-	.write_super	= sysv_write_super,
 	.sync_fs	= sysv_sync_fs,
 	.remount_fs	= sysv_remount,
 	.statfs		= sysv_statfs,
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 11b07672f6c..0bc35fdc58e 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -117,7 +117,6 @@ static inline void dirty_sb(struct super_block *sb)
 	mark_buffer_dirty(sbi->s_bh1);
 	if (sbi->s_bh1 != sbi->s_bh2)
 		mark_buffer_dirty(sbi->s_bh2);
-	sb->s_dirt = 1;
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 65e5e83f7d01a3790deb1ba2e0d887e715c43307 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 16:28:06 +0300
Subject: fs/ufs: remove extra superblock write on unmount

UFS calls 'ufs_write_super()' from 'ufs_put_super()' in order to write the
superblocks to the media. However, it is not needed because VFS calls
'->sync_fs()' before calling '->put_super()' - so by the time we are in
'ufs_write_super()', the superblocks are already synchronized.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/super.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 302f340d007..ae91e0af266 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1238,9 +1238,6 @@ static void ufs_put_super(struct super_block *sb)
 		
 	UFSD("ENTER\n");
 
-	if (sb->s_dirt)
-		ufs_write_super(sb);
-
 	if (!(sb->s_flags & MS_RDONLY))
 		ufs_put_super_internal(sb);
 	
-- 
cgit v1.2.3-70-g09d2


From 7bd54ef722e9938768f524677be0ac4985d8473a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 16:28:07 +0300
Subject: fs/ufs: re-arrange the code a bit

This patch does not do any functional changes. It only moves 3 functions
in fs/ufs/super.c a little bit up in order to prepare for further changes
where I'll need this new arrangement to avoid forward declarations.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/super.c | 117 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 58 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ae91e0af266..ad56c6dffc6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -691,6 +691,64 @@ static void ufs_put_super_internal(struct super_block *sb)
 	UFSD("EXIT\n");
 }
 
+static int ufs_sync_fs(struct super_block *sb, int wait)
+{
+	struct ufs_sb_private_info * uspi;
+	struct ufs_super_block_first * usb1;
+	struct ufs_super_block_third * usb3;
+	unsigned flags;
+
+	lock_ufs(sb);
+	lock_super(sb);
+
+	UFSD("ENTER\n");
+
+	flags = UFS_SB(sb)->s_flags;
+	uspi = UFS_SB(sb)->s_uspi;
+	usb1 = ubh_get_usb_first(uspi);
+	usb3 = ubh_get_usb_third(uspi);
+
+	usb1->fs_time = cpu_to_fs32(sb, get_seconds());
+	if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
+		ufs_set_fs_state(sb, usb1, usb3,
+				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
+	ufs_put_cstotal(sb);
+	sb->s_dirt = 0;
+
+	UFSD("EXIT\n");
+	unlock_super(sb);
+	unlock_ufs(sb);
+
+	return 0;
+}
+
+static void ufs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ufs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
+}
+
+static void ufs_put_super(struct super_block *sb)
+{
+	struct ufs_sb_info * sbi = UFS_SB(sb);
+
+	UFSD("ENTER\n");
+
+	if (!(sb->s_flags & MS_RDONLY))
+		ufs_put_super_internal(sb);
+
+	ubh_brelse_uspi (sbi->s_uspi);
+	kfree (sbi->s_uspi);
+	kfree (sbi);
+	sb->s_fs_info = NULL;
+	UFSD("EXIT\n");
+	return;
+}
+
 static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ufs_sb_info * sbi;
@@ -1191,65 +1249,6 @@ failed_nomem:
 	return -ENOMEM;
 }
 
-static int ufs_sync_fs(struct super_block *sb, int wait)
-{
-	struct ufs_sb_private_info * uspi;
-	struct ufs_super_block_first * usb1;
-	struct ufs_super_block_third * usb3;
-	unsigned flags;
-
-	lock_ufs(sb);
-	lock_super(sb);
-
-	UFSD("ENTER\n");
-
-	flags = UFS_SB(sb)->s_flags;
-	uspi = UFS_SB(sb)->s_uspi;
-	usb1 = ubh_get_usb_first(uspi);
-	usb3 = ubh_get_usb_third(uspi);
-
-	usb1->fs_time = cpu_to_fs32(sb, get_seconds());
-	if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
-	    (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
-	    (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
-		ufs_set_fs_state(sb, usb1, usb3,
-				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
-	ufs_put_cstotal(sb);
-	sb->s_dirt = 0;
-
-	UFSD("EXIT\n");
-	unlock_super(sb);
-	unlock_ufs(sb);
-
-	return 0;
-}
-
-static void ufs_write_super(struct super_block *sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		ufs_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
-}
-
-static void ufs_put_super(struct super_block *sb)
-{
-	struct ufs_sb_info * sbi = UFS_SB(sb);
-		
-	UFSD("ENTER\n");
-
-	if (!(sb->s_flags & MS_RDONLY))
-		ufs_put_super_internal(sb);
-	
-	ubh_brelse_uspi (sbi->s_uspi);
-	kfree (sbi->s_uspi);
-	kfree (sbi);
-	sb->s_fs_info = NULL;
-	UFSD("EXIT\n");
-	return;
-}
-
-
 static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 {
 	struct ufs_sb_private_info * uspi;
-- 
cgit v1.2.3-70-g09d2


From 9e9ad5f408889db6038a59b38ede29ff1ba9ef2f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 12 Jul 2012 16:28:08 +0300
Subject: fs/ufs: get rid of write_super

This patch makes UFS stop using the VFS '->write_super()' method along with
the 's_dirt' superblock flag, because they are on their way out.

The way we implement this is that we schedule a delay job instead relying on
's_dirt' and '->write_super()'.

The whole "superblock write-out" VFS infrastructure is served by the
'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and
writes out all dirty superblocks using the '->write_super()' call-back.  But the
problem with this thread is that it wastes power by waking up the system every
5 seconds, even if there are no diry superblocks, or there are no client
file-systems which would need this (e.g., btrfs does not use
'->write_super()'). So we want to kill it completely and thus, we need to make
file-systems to stop using the '->write_super()' VFS service, and then remove
it together with the kernel thread.

Tested using fsstress from the LTP project.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/balloc.c |  8 ++++----
 fs/ufs/ialloc.c |  4 ++--
 fs/ufs/super.c  | 40 ++++++++++++++++++++++++++++++----------
 fs/ufs/ufs.h    |  5 +++++
 fs/ufs/ufs_fs.h |  1 +
 5 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 42694e11c23..1b3e410bf33 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -116,7 +116,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		ubh_sync_block(UCPI_UBH(ucpi));
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 	
 	unlock_super (sb);
 	UFSD("EXIT\n");
@@ -214,7 +214,7 @@ do_more:
 		goto do_more;
 	}
 
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 	unlock_super (sb);
 	UFSD("EXIT\n");
 	return;
@@ -557,7 +557,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		ubh_sync_block(UCPI_UBH(ucpi));
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 
 	UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
 	
@@ -677,7 +677,7 @@ succed:
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		ubh_sync_block(UCPI_UBH(ucpi));
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 
 	result += cgno * uspi->s_fpg;
 	UFSD("EXIT3, result %llu\n", (unsigned long long)result);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 4ec5c1085a8..e84cbe21b98 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -116,7 +116,7 @@ void ufs_free_inode (struct inode * inode)
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		ubh_sync_block(UCPI_UBH(ucpi));
 	
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 	unlock_super (sb);
 	UFSD("EXIT\n");
 }
@@ -288,7 +288,7 @@ cg_found:
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		ubh_sync_block(UCPI_UBH(ucpi));
-	sb->s_dirt = 1;
+	ufs_mark_sb_dirty(sb);
 
 	inode->i_ino = cg * uspi->s_ipg + bit;
 	inode_init_owner(inode, dir, mode);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ad56c6dffc6..444927e5706 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -302,7 +302,7 @@ void ufs_error (struct super_block * sb, const char * function,
 	if (!(sb->s_flags & MS_RDONLY)) {
 		usb1->fs_clean = UFS_FSBAD;
 		ubh_mark_buffer_dirty(USPI_UBH(uspi));
-		sb->s_dirt = 1;
+		ufs_mark_sb_dirty(sb);
 		sb->s_flags |= MS_RDONLY;
 	}
 	va_start (args, fmt);
@@ -334,7 +334,7 @@ void ufs_panic (struct super_block * sb, const char * function,
 	if (!(sb->s_flags & MS_RDONLY)) {
 		usb1->fs_clean = UFS_FSBAD;
 		ubh_mark_buffer_dirty(USPI_UBH(uspi));
-		sb->s_dirt = 1;
+		ufs_mark_sb_dirty(sb);
 	}
 	va_start (args, fmt);
 	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
@@ -715,7 +715,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
 		ufs_set_fs_state(sb, usb1, usb3,
 				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
 	ufs_put_cstotal(sb);
-	sb->s_dirt = 0;
 
 	UFSD("EXIT\n");
 	unlock_super(sb);
@@ -724,12 +723,31 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
-static void ufs_write_super(struct super_block *sb)
+static void delayed_sync_fs(struct work_struct *work)
 {
-	if (!(sb->s_flags & MS_RDONLY))
-		ufs_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
+	struct ufs_sb_info *sbi;
+
+	sbi = container_of(work, struct ufs_sb_info, sync_work.work);
+
+	spin_lock(&sbi->work_lock);
+	sbi->work_queued = 0;
+	spin_unlock(&sbi->work_lock);
+
+	ufs_sync_fs(sbi->sb, 1);
+}
+
+void ufs_mark_sb_dirty(struct super_block *sb)
+{
+	struct ufs_sb_info *sbi = UFS_SB(sb);
+	unsigned long delay;
+
+	spin_lock(&sbi->work_lock);
+	if (!sbi->work_queued) {
+		delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+		queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
+		sbi->work_queued = 1;
+	}
+	spin_unlock(&sbi->work_lock);
 }
 
 static void ufs_put_super(struct super_block *sb)
@@ -740,6 +758,7 @@ static void ufs_put_super(struct super_block *sb)
 
 	if (!(sb->s_flags & MS_RDONLY))
 		ufs_put_super_internal(sb);
+	cancel_delayed_work_sync(&sbi->sync_work);
 
 	ubh_brelse_uspi (sbi->s_uspi);
 	kfree (sbi->s_uspi);
@@ -774,6 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi)
 		goto failed_nomem;
 	sb->s_fs_info = sbi;
+	sbi->sb = sb;
 
 	UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
 	
@@ -785,6 +805,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 #endif
 	mutex_init(&sbi->mutex);
+	spin_lock_init(&sbi->work_lock);
+	INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
 	/*
 	 * Set default mount options
 	 * Parse mount options
@@ -1304,7 +1326,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 			ufs_set_fs_state(sb, usb1, usb3,
 				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
 		ubh_mark_buffer_dirty (USPI_UBH(uspi));
-		sb->s_dirt = 0;
 		sb->s_flags |= MS_RDONLY;
 	} else {
 	/*
@@ -1454,7 +1475,6 @@ static const struct super_operations ufs_super_ops = {
 	.write_inode	= ufs_write_inode,
 	.evict_inode	= ufs_evict_inode,
 	.put_super	= ufs_put_super,
-	.write_super	= ufs_write_super,
 	.sync_fs	= ufs_sync_fs,
 	.statfs		= ufs_statfs,
 	.remount_fs	= ufs_remount,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 528750b7e70..343e6fc571e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -20,6 +20,10 @@ struct ufs_sb_info {
 	unsigned s_mount_opt;
 	struct mutex mutex;
 	struct task_struct *mutex_owner;
+	struct super_block *sb;
+	int work_queued; /* non-zero if the delayed work is queued */
+	struct delayed_work sync_work; /* FS sync delayed work */
+	spinlock_t work_lock; /* protects sync_work and work_queued */
 };
 
 struct ufs_inode_info {
@@ -123,6 +127,7 @@ extern __printf(3, 4)
 void ufs_error(struct super_block *, const char *, const char *, ...);
 extern __printf(3, 4)
 void ufs_panic(struct super_block *, const char *, const char *, ...);
+void ufs_mark_sb_dirty(struct super_block *sb);
 
 /* symlink.c */
 extern const struct inode_operations ufs_fast_symlink_inode_operations;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 8aba544f9fa..0cbd5d340b6 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -34,6 +34,7 @@
 #include <linux/kernel.h>
 #include <linux/stat.h>
 #include <linux/fs.h>
+#include <linux/workqueue.h>
 
 #include <asm/div64.h>
 typedef __u64 __bitwise __fs64;
-- 
cgit v1.2.3-70-g09d2


From 6eedc70150d55b5885800eb6664ea226dc2cb66f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:27 +0200
Subject: vfs: Move noop_backing_dev_info check from sync into writeback

In principle, a filesystem may want to have ->sync_fs() called during sync(1)
although it does not have a bdi (i.e. s_bdi is set to noop_backing_dev_info).
Only writeback code really needs bdi set to something reasonable. So move the
checks where they are more logical.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs-writeback.c | 5 +++++
 fs/sync.c         | 7 -------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 41a3ccff18d..8f660dd6137 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1315,6 +1315,8 @@ void writeback_inodes_sb_nr(struct super_block *sb,
 		.reason			= reason,
 	};
 
+	if (sb->s_bdi == &noop_backing_dev_info)
+		return;
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
@@ -1398,6 +1400,9 @@ void sync_inodes_sb(struct super_block *sb)
 		.reason		= WB_REASON_SYNC,
 	};
 
+	/* Nothing to do? */
+	if (sb->s_bdi == &noop_backing_dev_info)
+		return;
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	bdi_queue_work(sb->s_bdi, &work);
diff --git a/fs/sync.c b/fs/sync.c
index 11e3d1c4490..b3d2a001293 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,13 +29,6 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	/*
-	 * This should be safe, as we require bdi backing to actually
-	 * write out data in the first place
-	 */
-	if (sb->s_bdi == &noop_backing_dev_info)
-		return 0;
-
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
 		sb->s_qcop->quota_sync(sb, -1, wait);
 
-- 
cgit v1.2.3-70-g09d2


From ceed17236a7491b44ee2be21f56a41ab997cbe7d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:28 +0200
Subject: quota: Split dquot_quota_sync() to writeback and cache flushing part

Split off part of dquot_quota_sync() which writes dquots into a quota file
to a separate function. In the next patch we will use the function from
filesystems and we do not want to abuse ->quota_sync quotactl callback more
than necessary.

Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/quota.c          |  4 ++--
 fs/gfs2/quota.h          |  2 +-
 fs/gfs2/super.c          |  2 +-
 fs/gfs2/sys.c            |  2 +-
 fs/quota/dquot.c         | 24 +++++++++++++++++++++---
 fs/quota/quota.c         |  4 ++--
 fs/sync.c                |  2 +-
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h |  8 +++++++-
 9 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e7d39..27b5cc7d688 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1108,7 +1108,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	}
 }
 
-int gfs2_quota_sync(struct super_block *sb, int type, int wait)
+int gfs2_quota_sync(struct super_block *sb, int type)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_data **qda;
@@ -1154,7 +1154,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 
 static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
 {
-	return gfs2_quota_sync(sb, type, 0);
+	return gfs2_quota_sync(sb, type);
 }
 
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 90bf1c302a9..f25d98b8790 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      u32 uid, u32 gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
+extern int gfs2_quota_sync(struct super_block *sb, int type);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621c240..313c329490e 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -838,7 +838,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	int error;
 
 	flush_workqueue(gfs2_delete_workqueue);
-	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+	gfs2_quota_sync(sdp->sd_vfs, 0);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b1d5f..73ecc34c434 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
 	if (simple_strtol(buf, NULL, 0) != 1)
 		return -EINVAL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+	gfs2_quota_sync(sdp->sd_vfs, 0);
 	return len;
 }
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10cbe841cb7..d679fc48ef2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -595,12 +595,14 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int dquot_quota_sync(struct super_block *sb, int type, int wait)
+/* Write all dquot structures to quota files */
+int dquot_writeback_dquots(struct super_block *sb, int type)
 {
 	struct list_head *dirty;
 	struct dquot *dquot;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
+	int err, ret = 0;
 
 	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
 			atomic_inc(&dquot->dq_count);
 			spin_unlock(&dq_list_lock);
 			dqstats_inc(DQST_LOOKUPS);
-			sb->dq_op->write_dquot(dquot);
+			err = sb->dq_op->write_dquot(dquot);
+			if (!ret && err)
+				err = ret;
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
 		}
@@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
 	dqstats_inc(DQST_SYNCS);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
-	if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE))
+	return ret;
+}
+EXPORT_SYMBOL(dquot_writeback_dquots);
+
+/* Write all dquot structures to disk and make them visible from userspace */
+int dquot_quota_sync(struct super_block *sb, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int cnt;
+	int ret;
+
+	ret = dquot_writeback_dquots(sb, type);
+	if (ret)
+		return ret;
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
 		return 0;
 
 	/* This is not very clever (and fast) but currently I don't know about
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9a391204ca2..c659f92298d 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
+		sb->s_qcop->quota_sync(sb, *(int *)arg);
 }
 
 static int quota_sync_all(int type)
@@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_SYNC:
 		if (!sb->s_qcop->quota_sync)
 			return -ENOSYS;
-		return sb->s_qcop->quota_sync(sb, type, 1);
+		return sb->s_qcop->quota_sync(sb, type);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
 	case Q_XQUOTARM:
diff --git a/fs/sync.c b/fs/sync.c
index b3d2a001293..cae145dd801 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -30,7 +30,7 @@
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, -1, wait);
+		sb->s_qcop->quota_sync(sb, -1);
 
 	if (wait)
 		sync_inodes_sb(sb);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index c09fa042b5e..524ede8a160 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -333,7 +333,7 @@ struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, struct path *);
 	int (*quota_on_meta)(struct super_block *, int, int);
 	int (*quota_off)(struct super_block *, int);
-	int (*quota_sync)(struct super_block *, int, int);
+	int (*quota_sync)(struct super_block *, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 17b977304a0..ec6b65feaab 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -83,7 +83,8 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
 int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int dquot_quota_off(struct super_block *sb, int type);
-int dquot_quota_sync(struct super_block *sb, int type, int wait);
+int dquot_writeback_dquots(struct super_block *sb, int type);
+int dquot_quota_sync(struct super_block *sb, int type);
 int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
@@ -255,6 +256,11 @@ static inline int dquot_resume(struct super_block *sb, int type)
 
 #define dquot_file_open		generic_file_open
 
+static inline int dquot_writeback_dquots(struct super_block *sb, int type)
+{
+	return 0;
+}
+
 #endif /* CONFIG_QUOTA */
 
 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-- 
cgit v1.2.3-70-g09d2


From a1177825719ccef3f76ef39bbfd5ebb6087d53c7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:29 +0200
Subject: quota: Move quota syncing to ->sync_fs method

Since the moment writes to quota files are using block device page cache and
space for quota structures is reserved at the moment they are first accessed we
have no reason to sync quota before inode writeback. In fact this order is now
only harmful since quota information can easily change during inode writeback
(either because conversion of delayed-allocated extents or simply because of
allocation of new blocks for simple filesystems not using page_mkwrite).

So move syncing of quota information after writeback of inodes into ->sync_fs
method. This way we do not have to use ->quota_sync callback which is primarily
intended for use by quotactl syscall anyway and we get rid of calling
->sync_fs() twice unnecessarily. We skip quota syncing for OCFS2 since it does
proper quota journalling in all cases (unlike ext3, ext4, and reiserfs which
also support legacy non-journalled quotas) and thus there are no dirty quota
structures.

CC: "Theodore Ts'o" <tytso@mit.edu>
CC: Joel Becker <jlbec@evilplan.org>
CC: reiserfs-devel@vger.kernel.org
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Acked-by: Dave Kleikamp <shaggy@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/super.c     | 6 ++++++
 fs/ext3/super.c     | 5 +++++
 fs/ext4/super.c     | 5 +++++
 fs/gfs2/super.c     | 2 ++
 fs/jfs/super.c      | 5 +++++
 fs/reiserfs/super.c | 5 +++++
 fs/sync.c           | 3 ---
 7 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b3621cb7ea3..5df3d2d8169 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
 
+	/*
+	 * Write quota structures to quota file, sync_blockdev() will write
+	 * them to disk later
+	 */
+	dquot_writeback_dquots(sb, -1);
+
 	spin_lock(&sbi->s_lock);
 	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
 		ext2_debug("setting valid to 0\n");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c3a44b7c37..4ac304c55c5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2526,6 +2526,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
 	tid_t target;
 
 	trace_ext3_sync_fs(sb, wait);
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(sb, -1);
 	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
 		if (wait)
 			log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef0..d8759401eca 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 
 	trace_ext4_sync_fs(sb, wait);
 	flush_workqueue(sbi->dio_unwritten_wq);
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(sb, -1);
 	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
 		if (wait)
 			jbd2_log_wait_commit(sbi->s_journal, target);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 313c329490e..f3d6bbfb32c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -952,6 +952,8 @@ restart:
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
+
+	gfs2_quota_sync(sb, -1);
 	if (wait && sdp)
 		gfs2_log_flush(sdp, NULL);
 	return 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4a82950f412..c55c7452d28 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
 
 	/* log == NULL indicates read-only mount */
 	if (log) {
+		/*
+		 * Write quota structures to quota file, sync_blockdev() will
+		 * write them to disk later
+		 */
+		dquot_writeback_dquots(sb, -1);
 		jfs_flush_journal(log, wait);
 		jfs_syncpt(log, 0);
 	}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 651ce767b55..7a37dabf5a9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
 	struct reiserfs_transaction_handle th;
 
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(s, -1);
 	reiserfs_write_lock(s);
 	if (!journal_begin(&th, s, 1))
 		if (!journal_end_sync(&th, s, 1))
diff --git a/fs/sync.c b/fs/sync.c
index cae145dd801..66acd2ba91c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,9 +29,6 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, -1);
-
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-- 
cgit v1.2.3-70-g09d2


From b3de653105180b57af90ef2f5b8441f085f4ff56 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:30 +0200
Subject: vfs: Reorder operations during sys_sync

Change the order of operations during sync from

for_each_sb {
        writeback_inodes_sb();
        sync_fs(nowait);
        __sync_blockdev(nowait);
}
for_each_sb {
        sync_inodes_sb();
        sync_fs(wait);
        __sync_blockdev(wait);
}

to

for_each_sb
        writeback_inodes_sb();
for_each_sb
        sync_fs(nowait);
for_each_sb
        __sync_blockdev(nowait);
for_each_sb
        sync_inodes_sb();
for_each_sb
        sync_fs(wait);
for_each_sb
        __sync_blockdev(wait);

This is a preparation for the following patches in this series.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 46 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/sync.c b/fs/sync.c
index 66acd2ba91c..490e9020113 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -67,18 +67,28 @@ int sync_filesystem(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
 
-static void sync_one_sb(struct super_block *sb, void *arg)
+static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY))
-		__sync_filesystem(sb, *(int *)arg);
+		sync_inodes_sb(sb);
 }
-/*
- * Sync all the data for all the filesystems (called by sys_sync() and
- * emergency sync)
- */
-static void sync_filesystems(int wait)
+
+static void writeback_inodes_one_sb(struct super_block *sb, void *arg)
 {
-	iterate_supers(sync_one_sb, &wait);
+	if (!(sb->s_flags & MS_RDONLY))
+		writeback_inodes_sb(sb, WB_REASON_SYNC);
+}
+
+static void sync_fs_one_sb(struct super_block *sb, void *arg)
+{
+	if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, *(int *)arg);
+}
+
+static void sync_blkdev_one_sb(struct super_block *sb, void *arg)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		__sync_blockdev(sb->s_bdev, *(int *)arg);
 }
 
 /*
@@ -87,9 +97,15 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
+	int nowait = 0, wait = 1;
+
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	sync_filesystems(0);
-	sync_filesystems(1);
+	iterate_supers(writeback_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &wait);
+	iterate_supers(sync_blkdev_one_sb, &wait);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -97,12 +113,18 @@ SYSCALL_DEFINE0(sync)
 
 static void do_sync_work(struct work_struct *work)
 {
+	int nowait = 0;
+
 	/*
 	 * Sync twice to reduce the possibility we skipped some inodes / pages
 	 * because they were temporarily locked
 	 */
-	sync_filesystems(0);
-	sync_filesystems(0);
+	iterate_supers(sync_inodes_one_sb, &nowait);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, &nowait);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From 5c0d6b60a0ba46d45020547eacf7199171920935 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:31 +0200
Subject: vfs: Create function for iterating over block devices

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  5 +++++
 2 files changed, 41 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c2bbe1fb132..1e519195d45 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1710,3 +1710,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 	return res;
 }
 EXPORT_SYMBOL(__invalidate_device);
+
+void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+{
+	struct inode *inode, *old_inode = NULL;
+
+	spin_lock(&inode_sb_list_lock);
+	list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
+		struct address_space *mapping = inode->i_mapping;
+
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+		    mapping->nrpages == 0) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		__iget(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_sb_list_lock);
+		/*
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * inode_sb_list_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it under
+		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * later.
+		 */
+		iput(old_inode);
+		old_inode = inode;
+
+		func(I_BDEV(inode), arg);
+
+		spin_lock(&inode_sb_list_lock);
+	}
+	spin_unlock(&inode_sb_list_lock);
+	iput(old_inode);
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 48548bdd772..6a6ca85bee2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2102,6 +2102,7 @@ extern sector_t blkdev_max_block(struct block_device *bdev);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern void invalidate_bdev(struct block_device *);
+extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
 extern int sync_blockdev(struct block_device *bdev);
 extern void kill_bdev(struct block_device *);
 extern struct super_block *freeze_bdev(struct block_device *);
@@ -2123,6 +2124,10 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
 	return 0;
 }
+
+static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
+{
+}
 #endif
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
-- 
cgit v1.2.3-70-g09d2


From a8c7176b6ded413d5044a00f1d05477b95a6d7ad Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:32 +0200
Subject: vfs: Make sys_sync writeout also block device inodes

In case block device does not have filesystem mounted on it, sys_sync will just
ignore it and doesn't writeout its dirty pages. This is because writeback code
avoids writing inodes from superblock without backing device and
blockdev_superblock is such a superblock.  Since it's unexpected that sync
doesn't writeout dirty data for block devices be nice to users and change the
behavior to do so. So now we iterate over all block devices on blockdev_super
instead of iterating over all superblocks when syncing block devices.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/sync.c b/fs/sync.c
index 490e9020113..0b166f26362 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -85,10 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
-static void sync_blkdev_one_sb(struct super_block *sb, void *arg)
+static void flush_one_bdev(struct block_device *bdev, void *arg)
 {
-	if (!(sb->s_flags & MS_RDONLY))
-		__sync_blockdev(sb->s_bdev, *(int *)arg);
+	__sync_blockdev(bdev, 0);
+}
+
+static void sync_one_bdev(struct block_device *bdev, void *arg)
+{
+	sync_blockdev(bdev);
 }
 
 /*
@@ -102,10 +106,10 @@ SYSCALL_DEFINE0(sync)
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
 	iterate_supers(writeback_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_supers(sync_blkdev_one_sb, &wait);
+	iterate_bdevs(sync_one_bdev, NULL);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -121,10 +125,10 @@ static void do_sync_work(struct work_struct *work)
 	 */
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From d0e91b13eb34d449922124c34f8a05e498daa089 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:33 +0200
Subject: vfs: Remove unnecessary flushing of block devices

It is not necessary to write block devices twice. The reason why we first did
flush and then proper sync is that
  for_each_bdev() {
    write_bdev()
    wait_for_completion()
  }
is much slower than
  for_each_bdev()
    write_bdev()
  for_each_bdev()
    wait_for_completion()
when there is bigger amount of data. But as is seen in the above, there's no real
need to scan pages and submit them twice. We just need to separate the submission
and waiting part. This patch does that.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/sync.c b/fs/sync.c
index 0b166f26362..131ddae87a1 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -85,14 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
-static void flush_one_bdev(struct block_device *bdev, void *arg)
+static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
 {
-	__sync_blockdev(bdev, 0);
+	filemap_fdatawrite(bdev->bd_inode->i_mapping);
 }
 
-static void sync_one_bdev(struct block_device *bdev, void *arg)
+static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 {
-	sync_blockdev(bdev);
+	filemap_fdatawait(bdev->bd_inode->i_mapping);
 }
 
 /*
@@ -106,10 +106,10 @@ SYSCALL_DEFINE0(sync)
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
 	iterate_supers(writeback_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_bdevs(sync_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
+	iterate_bdevs(fdatawait_one_bdev, NULL);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -125,10 +125,10 @@ static void do_sync_work(struct work_struct *work)
 	 */
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:34 +0200
Subject: vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and
 reorder sync passes

wakeup_flusher_threads(0) will queue work doing complete writeback for each
flusher thread. Thus there is not much point in submitting another work doing
full inode WB_SYNC_NONE writeback by writeback_inodes_sb().

After this change it does not make sense to call nonblocking ->sync_fs and
block device flush before calling sync_inodes_sb() because
wakeup_flusher_threads() is completely asynchronous and thus these functions
would be called in parallel with inode writeback running which will effectively
void any work they do. So we move sync_inodes_sb() call before these two
functions.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/sync.c b/fs/sync.c
index 131ddae87a1..eb8722dc556 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -73,12 +73,6 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 		sync_inodes_sb(sb);
 }
 
-static void writeback_inodes_one_sb(struct super_block *sb, void *arg)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		writeback_inodes_sb(sb, WB_REASON_SYNC);
-}
-
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
@@ -96,17 +90,22 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 }
 
 /*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
+ * Sync everything. We start by waking flusher threads so that most of
+ * writeback runs on all devices in parallel. Then we sync all inodes reliably
+ * which effectively also waits for all flusher threads to finish doing
+ * writeback. At this point all data is on disk so metadata should be stable
+ * and we tell filesystems to sync their metadata via ->sync_fs() calls.
+ * Finally, we writeout all block devices because some filesystems (e.g. ext2)
+ * just write metadata (such as inodes or bitmaps) to block device page cache
+ * and do not sync it on their own in ->sync_fs().
  */
 SYSCALL_DEFINE0(sync)
 {
 	int nowait = 0, wait = 1;
 
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	iterate_supers(writeback_inodes_one_sb, NULL);
-	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &wait);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	iterate_bdevs(fdatawait_one_bdev, NULL);
-- 
cgit v1.2.3-70-g09d2


From e8b96eb5034a0ccebf36760f88e31ea3e3cdf1e4 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 30 Apr 2012 13:11:29 -0500
Subject: vfs: allow custom EOF in generic_file_llseek code

For ext3/4 htree directories, using the vfs llseek function with
SEEK_END goes to i_size like for any other file, but in reality
we want the maximum possible hash value.  Recent changes
in ext4 have cut & pasted generic_file_llseek() back into fs/ext4/dir.c,
but replicating this core code seems like a bad idea, especially
since the copy has already diverged from the vfs.

This patch updates generic_file_llseek_size to accept
both a custom maximum offset, and a custom EOF position.  With this
in place, ext4_dir_llseek can pass in the appropriate maximum hash
position for both maxsize and eof, and get what it wants.

As far as I know, this does not fix any bugs - nfs in the kernel
doesn't use SEEK_END, and I don't know of any user who does.  But
some ext4 folks seem keen on doing the right thing here, and I can't
really argue.

(Patch also fixes up some comments slightly)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/dir.c      |  3 ++-
 fs/ext4/file.c     |  3 ++-
 fs/read_write.c    | 18 ++++++++++--------
 include/linux/fs.h |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 92490e9f85c..901f67e3786 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -303,7 +303,8 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
 
 	if (likely(dx_dir))
 		return generic_file_llseek_size(file, offset, origin,
-					        ext3_get_htree_eof(file));
+					        ext3_get_htree_eof(file),
+						i_size_read(inode));
 	else
 		return generic_file_llseek(file, offset, origin);
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8c7642a0005..f3dadd0a0d5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 	else
 		maxbytes = inode->i_sb->s_maxbytes;
 
-	return generic_file_llseek_size(file, offset, origin, maxbytes);
+	return generic_file_llseek_size(file, offset, origin,
+					maxbytes, i_size_read(inode));
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index c20614f86c0..1adfb691e4f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -55,10 +55,11 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
  * @origin:	type of seek
- * @size:	max size of file system
+ * @size:	max size of this file in file system
+ * @eof:	offset used for SEEK_END position
  *
  * This is a variant of generic_file_llseek that allows passing in a custom
- * file size.
+ * maximum file size and a custom EOF position, for e.g. hashed directories
  *
  * Synchronization:
  * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
@@ -67,13 +68,13 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  */
 loff_t
 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
-		loff_t maxsize)
+		loff_t maxsize, loff_t eof)
 {
 	struct inode *inode = file->f_mapping->host;
 
 	switch (origin) {
 	case SEEK_END:
-		offset += i_size_read(inode);
+		offset += eof;
 		break;
 	case SEEK_CUR:
 		/*
@@ -99,7 +100,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
 		 * In the generic case the entire file is data, so as long as
 		 * offset isn't at the end of the file then the offset is data.
 		 */
-		if (offset >= i_size_read(inode))
+		if (offset >= eof)
 			return -ENXIO;
 		break;
 	case SEEK_HOLE:
@@ -107,9 +108,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
 		 * There is a virtual hole at the end of the file, so as long as
 		 * offset isn't i_size or larger, return i_size.
 		 */
-		if (offset >= i_size_read(inode))
+		if (offset >= eof)
 			return -ENXIO;
-		offset = i_size_read(inode);
+		offset = eof;
 		break;
 	}
 
@@ -132,7 +133,8 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 	struct inode *inode = file->f_mapping->host;
 
 	return generic_file_llseek_size(file, offset, origin,
-					inode->i_sb->s_maxbytes);
+					inode->i_sb->s_maxbytes,
+					i_size_read(inode));
 }
 EXPORT_SYMBOL(generic_file_llseek);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6a6ca85bee2..34acf51273d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2454,7 +2454,7 @@ extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-		int origin, loff_t maxsize);
+		int origin, loff_t maxsize, loff_t eof);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
-- 
cgit v1.2.3-70-g09d2


From ec7268ce21b379a248705548573393e4f346b20b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 30 Apr 2012 13:14:03 -0500
Subject: ext4: use core vfs llseek code for dir seeks

Use the new functionality in generic_file_llseek_size() to
accept a custom EOF position, and un-cut-and-paste all the
vfs llseek code from ext4.

Also fix up comments on ext4_llseek() to reflect reality.

Signed-off-by: Eric Sandeen <sandeen@redaht.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/dir.c  | 75 +++++++++++-----------------------------------------------
 fs/ext4/file.c |  6 ++---
 2 files changed, 17 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index aa39e600d15..8e07d2a5a13 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
 
 
 /*
- * ext4_dir_llseek() based on generic_file_llseek() to handle both
- * non-htree and htree directories, where the "offset" is in terms
- * of the filename hash value instead of the byte offset.
+ * ext4_dir_llseek() calls generic_file_llseek_size to handle htree
+ * directories, where the "offset" is in terms of the filename hash
+ * value instead of the byte offset.
  *
- * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
- *       will be invalid once the directory was converted into a dx directory
+ * Because we may return a 64-bit hash that is well beyond offset limits,
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
+ *
+ * For non-htree, ext4_llseek already chooses the proper max offset.
  */
 loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
 {
 	struct inode *inode = file->f_mapping->host;
-	loff_t ret = -EINVAL;
 	int dx_dir = is_dx_dir(inode);
+	loff_t htree_max = ext4_get_htree_eof(file);
 
-	mutex_lock(&inode->i_mutex);
-
-	/* NOTE: relative offsets with dx directories might not work
-	 *       as expected, as it is difficult to figure out the
-	 *       correct offset between dx hashes */
-
-	switch (origin) {
-	case SEEK_END:
-		if (unlikely(offset > 0))
-			goto out_err; /* not supported for directories */
-
-		/* so only negative offsets are left, does that have a
-		 * meaning for directories at all? */
-		if (dx_dir)
-			offset += ext4_get_htree_eof(file);
-		else
-			offset += inode->i_size;
-		break;
-	case SEEK_CUR:
-		/*
-		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
-		 * position-querying operation.  Avoid rewriting the "same"
-		 * f_pos value back to the file because a concurrent read(),
-		 * write() or lseek() might have altered it
-		 */
-		if (offset == 0) {
-			offset = file->f_pos;
-			goto out_ok;
-		}
-
-		offset += file->f_pos;
-		break;
-	}
-
-	if (unlikely(offset < 0))
-		goto out_err;
-
-	if (!dx_dir) {
-		if (offset > inode->i_sb->s_maxbytes)
-			goto out_err;
-	} else if (offset > ext4_get_htree_eof(file))
-		goto out_err;
-
-	/* Special lock needed here? */
-	if (offset != file->f_pos) {
-		file->f_pos = offset;
-		file->f_version = 0;
-	}
-
-out_ok:
-	ret = offset;
-out_err:
-	mutex_unlock(&inode->i_mutex);
-
-	return ret;
+	if (likely(dx_dir))
+		return generic_file_llseek_size(file, offset, origin,
+						    htree_max, htree_max);
+	else
+		return ext4_llseek(file, offset, origin);
 }
 
 /*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f3dadd0a0d5..782eecb57e4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 }
 
 /*
- * ext4_llseek() copied from generic_file_llseek() to handle both
- * block-mapped and extent-mapped maxbytes values. This should
- * otherwise be identical with generic_file_llseek().
+ * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
+ * by calling generic_file_llseek_size() with the appropriate maxbytes
+ * value for each.
  */
 loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 {
-- 
cgit v1.2.3-70-g09d2


From de9b9422026357c878a9fa3714936be0e1388e9c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 30 Apr 2012 13:16:04 -0500
Subject: ext3: pass custom EOF to generic_file_llseek_size()

Use the new custom EOF argument to generic_file_llseek_size so
that SEEK_END will go to the max hash value for htree dirs
in ext3 rather than to i_size_read()

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 901f67e3786..c8fff930790 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -300,11 +300,11 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
+	loff_t htree_max = ext3_get_htree_eof(file);
 
 	if (likely(dx_dir))
 		return generic_file_llseek_size(file, offset, origin,
-					        ext3_get_htree_eof(file),
-						i_size_read(inode));
+					        htree_max, htree_max);
 	else
 		return generic_file_llseek(file, offset, origin);
 }
-- 
cgit v1.2.3-70-g09d2


From 3c0a6163688b8ca3f44029c7bdb3d91a865c878a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Jul 2012 17:32:50 +0400
Subject: unobfuscate follow_up() a bit

really convoluted test in there has grown up during struct mount
introduction; what it checks is that we'd reached the root of
mount tree.
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index c14dfac83c2..d4d15bbc8af 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -722,7 +722,7 @@ int follow_up(struct path *path)
 
 	br_read_lock(&vfsmount_lock);
 	parent = mnt->mnt_parent;
-	if (&parent->mnt == path->mnt) {
+	if (parent == mnt) {
 		br_read_unlock(&vfsmount_lock);
 		return 0;
 	}
-- 
cgit v1.2.3-70-g09d2


From 32a7991b6a9c758e4e2b8166c5e1cc7563c3dcde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Jul 2012 20:43:19 +0400
Subject: tidy up namei.c a bit

locking/unlocking for rcu walk taken to a couple of inline helpers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index d4d15bbc8af..2ccc35c4dc2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -427,6 +427,18 @@ EXPORT_SYMBOL(path_put);
  * to restart the path walk from the beginning in ref-walk mode.
  */
 
+static inline void lock_rcu_walk(void)
+{
+	br_read_lock(&vfsmount_lock);
+	rcu_read_lock();
+}
+
+static inline void unlock_rcu_walk(void)
+{
+	rcu_read_unlock();
+	br_read_unlock(&vfsmount_lock);
+}
+
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
@@ -480,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 	}
 	mntget(nd->path.mnt);
 
-	rcu_read_unlock();
-	br_read_unlock(&vfsmount_lock);
+	unlock_rcu_walk();
 	nd->flags &= ~LOOKUP_RCU;
 	return 0;
 
@@ -522,15 +533,13 @@ static int complete_walk(struct nameidata *nd)
 		spin_lock(&dentry->d_lock);
 		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
 			spin_unlock(&dentry->d_lock);
-			rcu_read_unlock();
-			br_read_unlock(&vfsmount_lock);
+			unlock_rcu_walk();
 			return -ECHILD;
 		}
 		BUG_ON(nd->inode != dentry->d_inode);
 		spin_unlock(&dentry->d_lock);
 		mntget(nd->path.mnt);
-		rcu_read_unlock();
-		br_read_unlock(&vfsmount_lock);
+		unlock_rcu_walk();
 	}
 
 	if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -985,8 +994,7 @@ failed:
 	nd->flags &= ~LOOKUP_RCU;
 	if (!(nd->flags & LOOKUP_ROOT))
 		nd->root.mnt = NULL;
-	rcu_read_unlock();
-	br_read_unlock(&vfsmount_lock);
+	unlock_rcu_walk();
 	return -ECHILD;
 }
 
@@ -1323,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd)
 		nd->flags &= ~LOOKUP_RCU;
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		rcu_read_unlock();
-		br_read_unlock(&vfsmount_lock);
+		unlock_rcu_walk();
 	}
 }
 
@@ -1691,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		nd->path = nd->root;
 		nd->inode = inode;
 		if (flags & LOOKUP_RCU) {
-			br_read_lock(&vfsmount_lock);
-			rcu_read_lock();
+			lock_rcu_walk();
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
 		} else {
 			path_get(&nd->path);
@@ -1704,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 
 	if (*name=='/') {
 		if (flags & LOOKUP_RCU) {
-			br_read_lock(&vfsmount_lock);
-			rcu_read_lock();
+			lock_rcu_walk();
 			set_root_rcu(nd);
 		} else {
 			set_root(nd);
@@ -1717,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 			struct fs_struct *fs = current->fs;
 			unsigned seq;
 
-			br_read_lock(&vfsmount_lock);
-			rcu_read_lock();
+			lock_rcu_walk();
 
 			do {
 				seq = read_seqcount_begin(&fs->seq);
@@ -1753,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 			if (fput_needed)
 				*fp = file;
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
-			br_read_lock(&vfsmount_lock);
-			rcu_read_lock();
+			lock_rcu_walk();
 		} else {
 			path_get(&file->f_path);
 			fput_light(file, fput_needed);
-- 
cgit v1.2.3-70-g09d2


From 8fc37ec54cd8e37193b0d42809b785ff19661c34 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Jul 2012 09:18:15 +0400
Subject: don't expose I_NEW inodes via dentry->d_inode

	d_instantiate(dentry, inode);
	unlock_new_inode(inode);

is a bad idea; do it the other way round...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/inode.c | 2 +-
 fs/ext2/namei.c     | 4 ++--
 fs/ext3/namei.c     | 4 ++--
 fs/ext4/namei.c     | 4 ++--
 fs/jffs2/dir.c      | 8 ++++----
 fs/jfs/namei.c      | 8 ++++----
 fs/reiserfs/namei.c | 8 ++++----
 7 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index da52cdbe838..ffa2be57804 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -269,8 +269,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
 		iput(ecryptfs_inode);
 		goto out;
 	}
-	d_instantiate(ecryptfs_dentry, ecryptfs_inode);
 	unlock_new_inode(ecryptfs_inode);
+	d_instantiate(ecryptfs_dentry, ecryptfs_inode);
 out:
 	return rc;
 }
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 9ba7de0e590..73b0d951983 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 {
 	int err = ext2_add_link(dentry, inode);
 	if (!err) {
-		d_instantiate(dentry, inode);
 		unlock_new_inode(inode);
+		d_instantiate(dentry, inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
@@ -242,8 +242,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 	if (err)
 		goto out_fail;
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 out:
 	return err;
 
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 85286dbe275..8f4fddac01a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1671,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle,
 	int err = ext3_add_entry(handle, dentry, inode);
 	if (!err) {
 		ext3_mark_inode_dirty(handle, inode);
-		d_instantiate(dentry, inode);
 		unlock_new_inode(inode);
+		d_instantiate(dentry, inode);
 		return 0;
 	}
 	drop_nlink(inode);
@@ -1836,8 +1836,8 @@ out_clear_inode:
 	if (err)
 		goto out_clear_inode;
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 out_stop:
 	brelse(dir_block);
 	ext3_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index eca3e48a62f..d0d3f0e87f9 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle,
 	int err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
 		ext4_mark_inode_dirty(handle, inode);
-		d_instantiate(dentry, inode);
 		unlock_new_inode(inode);
+		d_instantiate(dentry, inode);
 		return 0;
 	}
 	drop_nlink(inode);
@@ -2249,8 +2249,8 @@ out_clear_inode:
 	err = ext4_mark_inode_dirty(handle, dir);
 	if (err)
 		goto out_clear_inode;
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 out_stop:
 	brelse(dir_block);
 	ext4_journal_stop(handle);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 23245191c5b..ad7774d3209 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -226,8 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
 		  __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
 		  f->inocache->pino_nlink, inode->i_mapping->nrpages);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	return 0;
 
  fail:
@@ -446,8 +446,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	return 0;
 
  fail:
@@ -591,8 +591,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	return 0;
 
  fail:
@@ -766,8 +766,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	mutex_unlock(&dir_f->sem);
 	jffs2_complete_reservation(c);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	return 0;
 
  fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index c426293e16c..3b91a7ad608 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -176,8 +176,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		d_instantiate(dentry, ip);
 		unlock_new_inode(ip);
+		d_instantiate(dentry, ip);
 	}
 
       out2:
@@ -309,8 +309,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		d_instantiate(dentry, ip);
 		unlock_new_inode(ip);
+		d_instantiate(dentry, ip);
 	}
 
       out2:
@@ -1043,8 +1043,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		d_instantiate(dentry, ip);
 		unlock_new_inode(ip);
+		d_instantiate(dentry, ip);
 	}
 
       out2:
@@ -1424,8 +1424,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 		unlock_new_inode(ip);
 		iput(ip);
 	} else {
-		d_instantiate(dentry, ip);
 		unlock_new_inode(ip);
+		d_instantiate(dentry, ip);
 	}
 
       out1:
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 3916be1a330..8567fb84760 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -634,8 +634,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
 	reiserfs_update_inode_transaction(inode);
 	reiserfs_update_inode_transaction(dir);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 
       out_failed:
@@ -712,8 +712,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
 		goto out_failed;
 	}
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 
       out_failed:
@@ -800,8 +800,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	// the above add_entry did not update dir's stat data
 	reiserfs_update_sd(&th, dir);
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 out_failed:
 	reiserfs_write_unlock_once(dir->i_sb, lock_depth);
@@ -1096,8 +1096,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
 		goto out_failed;
 	}
 
-	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+	d_instantiate(dentry, inode);
 	retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
       out_failed:
 	reiserfs_write_unlock(parent_dir->i_sb);
-- 
cgit v1.2.3-70-g09d2


From 3b8b487114c95ef6db5fef708ef69bfb5209014e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Jun 2012 11:38:56 +0400
Subject: ecryptfs: don't reinvent the wheels, please - use struct completion

... and keep the sodding requests on stack - they are small enough.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/ecryptfs_kernel.h | 14 ---------
 fs/ecryptfs/kthread.c         | 72 ++++++++++++++++---------------------------
 fs/ecryptfs/main.c            |  5 ---
 3 files changed, 26 insertions(+), 65 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 867b64c5d84..989e034f02b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -550,20 +550,6 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
 extern struct kmem_cache *ecryptfs_key_sig_cache;
 extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
 extern struct kmem_cache *ecryptfs_key_tfm_cache;
-extern struct kmem_cache *ecryptfs_open_req_cache;
-
-struct ecryptfs_open_req {
-#define ECRYPTFS_REQ_PROCESSED 0x00000001
-#define ECRYPTFS_REQ_DROPPED   0x00000002
-#define ECRYPTFS_REQ_ZOMBIE    0x00000004
-	u32 flags;
-	struct file **lower_file;
-	struct dentry *lower_dentry;
-	struct vfsmount *lower_mnt;
-	wait_queue_head_t wait;
-	struct mutex mux;
-	struct list_head kthread_ctl_list;
-};
 
 struct inode *ecryptfs_get_inode(struct inode *lower_inode,
 				 struct super_block *sb);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0dbe58a8b17..c7d199dc7d2 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -27,7 +27,13 @@
 #include <linux/mount.h>
 #include "ecryptfs_kernel.h"
 
-struct kmem_cache *ecryptfs_open_req_cache;
+struct ecryptfs_open_req {
+	struct file **lower_file;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct completion done;
+	struct list_head kthread_ctl_list;
+};
 
 static struct ecryptfs_kthread_ctl {
 #define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
@@ -67,18 +73,13 @@ static int ecryptfs_threadfn(void *ignored)
 			req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
 					       struct ecryptfs_open_req,
 					       kthread_ctl_list);
-			mutex_lock(&req->mux);
 			list_del(&req->kthread_ctl_list);
-			if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
-				dget(req->lower_dentry);
-				mntget(req->lower_mnt);
-				(*req->lower_file) = dentry_open(
-					req->lower_dentry, req->lower_mnt,
-					(O_RDWR | O_LARGEFILE), current_cred());
-				req->flags |= ECRYPTFS_REQ_PROCESSED;
-			}
-			wake_up(&req->wait);
-			mutex_unlock(&req->mux);
+			dget(req->lower_dentry);
+			mntget(req->lower_mnt);
+			(*req->lower_file) = dentry_open(
+				req->lower_dentry, req->lower_mnt,
+				(O_RDWR | O_LARGEFILE), current_cred());
+			complete(&req->done);
 		}
 		mutex_unlock(&ecryptfs_kthread_ctl.mux);
 	}
@@ -111,10 +112,9 @@ void ecryptfs_destroy_kthread(void)
 	ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
 	list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
 			    kthread_ctl_list) {
-		mutex_lock(&req->mux);
-		req->flags |= ECRYPTFS_REQ_ZOMBIE;
-		wake_up(&req->wait);
-		mutex_unlock(&req->mux);
+		list_del(&req->kthread_ctl_list);
+		*req->lower_file = ERR_PTR(-EIO);
+		complete(&req->done);
 	}
 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
 	kthread_stop(ecryptfs_kthread);
@@ -136,7 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 			     struct vfsmount *lower_mnt,
 			     const struct cred *cred)
 {
-	struct ecryptfs_open_req *req;
+	struct ecryptfs_open_req req;
 	int flags = O_LARGEFILE;
 	int rc = 0;
 
@@ -153,17 +153,10 @@ int ecryptfs_privileged_open(struct file **lower_file,
 		rc = PTR_ERR((*lower_file));
 		goto out;
 	}
-	req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
-	if (!req) {
-		rc = -ENOMEM;
-		goto out;
-	}
-	mutex_init(&req->mux);
-	req->lower_file = lower_file;
-	req->lower_dentry = lower_dentry;
-	req->lower_mnt = lower_mnt;
-	init_waitqueue_head(&req->wait);
-	req->flags = 0;
+	init_completion(&req.done);
+	req.lower_file = lower_file;
+	req.lower_dentry = lower_dentry;
+	req.lower_mnt = lower_mnt;
 	mutex_lock(&ecryptfs_kthread_ctl.mux);
 	if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
 		rc = -EIO;
@@ -171,27 +164,14 @@ int ecryptfs_privileged_open(struct file **lower_file,
 		printk(KERN_ERR "%s: We are in the middle of shutting down; "
 		       "aborting privileged request to open lower file\n",
 			__func__);
-		goto out_free;
+		goto out;
 	}
-	list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
+	list_add_tail(&req.kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
 	wake_up(&ecryptfs_kthread_ctl.wait);
-	wait_event(req->wait, (req->flags != 0));
-	mutex_lock(&req->mux);
-	BUG_ON(req->flags == 0);
-	if (req->flags & ECRYPTFS_REQ_DROPPED
-	    || req->flags & ECRYPTFS_REQ_ZOMBIE) {
-		rc = -EIO;
-		printk(KERN_WARNING "%s: Privileged open request dropped\n",
-		       __func__);
-		goto out_unlock;
-	}
-	if (IS_ERR(*req->lower_file))
-		rc = PTR_ERR(*req->lower_file);
-out_unlock:
-	mutex_unlock(&req->mux);
-out_free:
-	kmem_cache_free(ecryptfs_open_req_cache, req);
+	wait_for_completion(&req.done);
+	if (IS_ERR(*lower_file))
+		rc = PTR_ERR(*lower_file);
 out:
 	return rc;
 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 7edeb3d893c..1c0b3b6b75c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -681,11 +681,6 @@ static struct ecryptfs_cache_info {
 		.name = "ecryptfs_key_tfm_cache",
 		.size = sizeof(struct ecryptfs_key_tfm),
 	},
-	{
-		.cache = &ecryptfs_open_req_cache,
-		.name = "ecryptfs_open_req_cache",
-		.size = sizeof(struct ecryptfs_open_req),
-	},
 };
 
 static void ecryptfs_free_kmem_caches(void)
-- 
cgit v1.2.3-70-g09d2


From 765927b2d508712d320c8934db963bbe14c3fcec Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jun 2012 21:58:53 +0400
Subject: switch dentry_open() to struct path, make it grab references itself

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/inode.c |  18 ++---
 fs/autofs4/dev-ioctl.c                    |   4 +-
 fs/cachefiles/rdwr.c                      |   8 +-
 fs/ecryptfs/kthread.c                     |  21 ++----
 fs/exportfs/expfs.c                       |  13 ++--
 fs/hppfs/hppfs.c                          |  20 ++---
 fs/nfsd/vfs.c                             |  10 +--
 fs/notify/fanotify/fanotify_user.c        |   8 +-
 fs/open.c                                 |  17 ++---
 fs/xfs/xfs_ioctl.c                        |   7 +-
 include/linux/fs.h                        |   3 +-
 ipc/mqueue.c                              | 117 ++++++++++++------------------
 security/selinux/hooks.c                  |   3 +-
 security/selinux/include/security.h       |   2 +-
 security/selinux/selinuxfs.c              |   6 +-
 15 files changed, 106 insertions(+), 151 deletions(-)

(limited to 'fs')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1c9cac0cf89..d544d7816df 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -317,7 +317,7 @@ out:
 	return ret;
 }
 
-static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
+static int spufs_context_open(struct path *path)
 {
 	int ret;
 	struct file *filp;
@@ -326,11 +326,7 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
 	if (ret < 0)
 		return ret;
 
-	/*
-	 * get references for dget and mntget, will be released
-	 * in error path of *_open().
-	 */
-	filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred());
+	filp = dentry_open(path, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		return PTR_ERR(filp);
@@ -452,6 +448,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
 	int affinity;
 	struct spu_gang *gang;
 	struct spu_context *neighbor;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	ret = -EPERM;
 	if ((flags & SPU_CREATE_NOSCHED) &&
@@ -494,7 +491,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
 			put_spu_context(neighbor);
 	}
 
-	ret = spufs_context_open(dentry, mnt);
+	ret = spufs_context_open(&path);
 	if (ret < 0) {
 		WARN_ON(spufs_rmdir(inode, dentry));
 		if (affinity)
@@ -551,7 +548,7 @@ out:
 	return ret;
 }
 
-static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
+static int spufs_gang_open(struct path *path)
 {
 	int ret;
 	struct file *filp;
@@ -564,7 +561,7 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
 	 * get references for dget and mntget, will be released
 	 * in error path of *_open().
 	 */
-	filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred());
+	filp = dentry_open(path, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		return PTR_ERR(filp);
@@ -579,13 +576,14 @@ static int spufs_create_gang(struct inode *inode,
 			struct dentry *dentry,
 			struct vfsmount *mnt, umode_t mode)
 {
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	int ret;
 
 	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
 	if (ret)
 		goto out;
 
-	ret = spufs_gang_open(dentry, mnt);
+	ret = spufs_gang_open(&path);
 	if (ret < 0) {
 		int err = simple_rmdir(inode, dentry);
 		WARN_ON(err);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index aa9103f8f01..abf645c1703 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -257,8 +257,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 		 * corresponding to the autofs fs we want to open.
 		 */
 
-		filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
-				   current_cred());
+		filp = dentry_open(&path, O_RDONLY, current_cred());
+		path_put(&path);
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
 			goto out;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0e3c0924cc3..c0353dfac51 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -891,6 +891,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 	struct cachefiles_cache *cache;
 	mm_segment_t old_fs;
 	struct file *file;
+	struct path path;
 	loff_t pos, eof;
 	size_t len;
 	void *data;
@@ -916,10 +917,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 
 	/* write the page to the backing filesystem and let it store it in its
 	 * own time */
-	dget(object->backer);
-	mntget(cache->mnt);
-	file = dentry_open(object->backer, cache->mnt, O_RDWR,
-			   cache->cache_cred);
+	path.mnt = cache->mnt;
+	path.dentry = object->backer;
+	file = dentry_open(&path, O_RDWR, cache->cache_cred);
 	if (IS_ERR(file)) {
 		ret = PTR_ERR(file);
 	} else {
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index c7d199dc7d2..809e67d05ca 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -29,8 +29,7 @@
 
 struct ecryptfs_open_req {
 	struct file **lower_file;
-	struct dentry *lower_dentry;
-	struct vfsmount *lower_mnt;
+	struct path path;
 	struct completion done;
 	struct list_head kthread_ctl_list;
 };
@@ -74,10 +73,7 @@ static int ecryptfs_threadfn(void *ignored)
 					       struct ecryptfs_open_req,
 					       kthread_ctl_list);
 			list_del(&req->kthread_ctl_list);
-			dget(req->lower_dentry);
-			mntget(req->lower_mnt);
-			(*req->lower_file) = dentry_open(
-				req->lower_dentry, req->lower_mnt,
+			*req->lower_file = dentry_open(&req->path,
 				(O_RDWR | O_LARGEFILE), current_cred());
 			complete(&req->done);
 		}
@@ -140,23 +136,22 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	int flags = O_LARGEFILE;
 	int rc = 0;
 
+	init_completion(&req.done);
+	req.lower_file = lower_file;
+	req.path.dentry = lower_dentry;
+	req.path.mnt = lower_mnt;
+
 	/* Corresponding dput() and mntput() are done when the
 	 * lower file is fput() when all eCryptfs files for the inode are
 	 * released. */
-	dget(lower_dentry);
-	mntget(lower_mnt);
 	flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
-	(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
+	(*lower_file) = dentry_open(&req.path, flags, cred);
 	if (!IS_ERR(*lower_file))
 		goto out;
 	if ((flags & O_ACCMODE) == O_RDONLY) {
 		rc = PTR_ERR((*lower_file));
 		goto out;
 	}
-	init_completion(&req.done);
-	req.lower_file = lower_file;
-	req.lower_dentry = lower_dentry;
-	req.lower_mnt = lower_mnt;
 	mutex_lock(&ecryptfs_kthread_ctl.mux);
 	if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
 		rc = -EIO;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b42063cf1b2..29ab099e3e0 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -19,19 +19,19 @@
 #define dprintk(fmt, args...) do{}while(0)
 
 
-static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name,
-		struct dentry *child);
+static int get_name(const struct path *path, char *name, struct dentry *child);
 
 
 static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
 		char *name, struct dentry *child)
 {
 	const struct export_operations *nop = dir->d_sb->s_export_op;
+	struct path path = {.mnt = mnt, .dentry = dir};
 
 	if (nop->get_name)
 		return nop->get_name(dir, name, child);
 	else
-		return get_name(mnt, dir, name, child);
+		return get_name(&path, name, child);
 }
 
 /*
@@ -249,11 +249,10 @@ static int filldir_one(void * __buf, const char * name, int len,
  * calls readdir on the parent until it finds an entry with
  * the same inode number as the child, and returns that.
  */
-static int get_name(struct vfsmount *mnt, struct dentry *dentry,
-		char *name, struct dentry *child)
+static int get_name(const struct path *path, char *name, struct dentry *child)
 {
 	const struct cred *cred = current_cred();
-	struct inode *dir = dentry->d_inode;
+	struct inode *dir = path->dentry->d_inode;
 	int error;
 	struct file *file;
 	struct getdents_callback buffer;
@@ -267,7 +266,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
 	/*
 	 * Open the directory ...
 	 */
-	file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred);
+	file = dentry_open(path, O_RDONLY, cred);
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index e5c06531dcc..c1dffe47fde 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -420,8 +420,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 {
 	const struct cred *cred = file->f_cred;
 	struct hppfs_private *data;
-	struct vfsmount *proc_mnt;
-	struct dentry *proc_dentry;
+	struct path path;
 	char *host_file;
 	int err, fd, type, filter;
 
@@ -434,12 +433,11 @@ static int hppfs_open(struct inode *inode, struct file *file)
 	if (host_file == NULL)
 		goto out_free2;
 
-	proc_dentry = HPPFS_I(inode)->proc_dentry;
-	proc_mnt = inode->i_sb->s_fs_info;
+	path.mnt = inode->i_sb->s_fs_info;
+	path.dentry = HPPFS_I(inode)->proc_dentry;
 
 	/* XXX This isn't closed anywhere */
-	data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
-				      file_mode(file->f_mode), cred);
+	data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
 	err = PTR_ERR(data->proc_file);
 	if (IS_ERR(data->proc_file))
 		goto out_free1;
@@ -484,8 +482,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
 {
 	const struct cred *cred = file->f_cred;
 	struct hppfs_private *data;
-	struct vfsmount *proc_mnt;
-	struct dentry *proc_dentry;
+	struct path path;
 	int err;
 
 	err = -ENOMEM;
@@ -493,10 +490,9 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
 	if (data == NULL)
 		goto out;
 
-	proc_dentry = HPPFS_I(inode)->proc_dentry;
-	proc_mnt = inode->i_sb->s_fs_info;
-	data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
-				      file_mode(file->f_mode), cred);
+	path.mnt = inode->i_sb->s_fs_info;
+	path.dentry = HPPFS_I(inode)->proc_dentry;
+	data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
 	err = PTR_ERR(data->proc_file);
 	if (IS_ERR(data->proc_file))
 		goto out_free;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 05d9eee6be3..4700a0a929d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -745,7 +745,7 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 			int may_flags, struct file **filp)
 {
-	struct dentry	*dentry;
+	struct path	path;
 	struct inode	*inode;
 	int		flags = O_RDONLY|O_LARGEFILE;
 	__be32		err;
@@ -762,8 +762,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 	if (err)
 		goto out;
 
-	dentry = fhp->fh_dentry;
-	inode = dentry->d_inode;
+	path.mnt = fhp->fh_export->ex_path.mnt;
+	path.dentry = fhp->fh_dentry;
+	inode = path.dentry->d_inode;
 
 	/* Disallow write access to files with the append-only bit set
 	 * or any access when mandatory locking enabled
@@ -792,8 +793,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		else
 			flags = O_WRONLY|O_LARGEFILE;
 	}
-	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-			    flags, current_cred());
+	*filp = dentry_open(&path, flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
 	else {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3568c8a8b13..d4380366973 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -61,8 +61,6 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	int client_fd;
-	struct dentry *dentry;
-	struct vfsmount *mnt;
 	struct file *new_file;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -81,12 +79,10 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	 * we need a new file handle for the userspace program so it can read even if it was
 	 * originally opened O_WRONLY.
 	 */
-	dentry = dget(event->path.dentry);
-	mnt = mntget(event->path.mnt);
 	/* it's possible this event was an overflow event.  in that case dentry and mnt
 	 * are NULL;  That's fine, just don't call dentry open */
-	if (dentry && mnt)
-		new_file = dentry_open(dentry, mnt,
+	if (event->path.dentry && event->path.mnt)
+		new_file = dentry_open(&event->path,
 				       group->fanotify_data.f_flags | FMODE_NONOTIFY,
 				       current_cred());
 	else
diff --git a/fs/open.c b/fs/open.c
index 75bea868ef8..1e914b397e1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -766,11 +766,7 @@ int finish_no_open(struct file *file, struct dentry *dentry)
 }
 EXPORT_SYMBOL(finish_no_open);
 
-/*
- * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
- * error.
- */
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
+struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *cred)
 {
 	int error;
@@ -779,19 +775,16 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 	validate_creds(cred);
 
 	/* We must always pass in a valid mount pointer. */
-	BUG_ON(!mnt);
+	BUG_ON(!path->mnt);
 
 	error = -ENFILE;
 	f = get_empty_filp();
-	if (f == NULL) {
-		dput(dentry);
-		mntput(mnt);
+	if (f == NULL)
 		return ERR_PTR(error);
-	}
 
 	f->f_flags = flags;
-	f->f_path.mnt = mnt;
-	f->f_path.dentry = dentry;
+	f->f_path = *path;
+	path_get(&f->f_path);
 	error = do_dentry_open(f, NULL, cred);
 	if (!error) {
 		error = open_check_o_direct(f);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3a05a41b5d7..1f1535d25a9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -208,6 +208,7 @@ xfs_open_by_handle(
 	struct inode		*inode;
 	struct dentry		*dentry;
 	fmode_t			fmode;
+	struct path		path;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -XFS_ERROR(EPERM);
@@ -252,8 +253,10 @@ xfs_open_by_handle(
 		goto out_dput;
 	}
 
-	filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
-			   hreq->oflags, cred);
+	path.mnt = parfilp->f_path.mnt;
+	path.dentry = dentry;
+	filp = dentry_open(&path, hreq->oflags, cred);
+	dput(dentry);
 	if (IS_ERR(filp)) {
 		put_unused_fd(fd);
 		return PTR_ERR(filp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34acf51273d..8fabb037a48 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2060,8 +2060,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags,
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int);
-extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
-				 const struct cred *);
+extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 enum {
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 2dee38d53c7..f8e54f5b908 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -721,8 +721,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
-static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
-			struct dentry *dentry, int oflag, umode_t mode,
+static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
+			struct path *path, int oflag, umode_t mode,
 			struct mq_attr *attr)
 {
 	const struct cred *cred = current_cred();
@@ -732,9 +732,9 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
 	if (attr) {
 		ret = mq_attr_ok(ipc_ns, attr);
 		if (ret)
-			goto out;
+			return ERR_PTR(ret);
 		/* store for use during create */
-		dentry->d_fsdata = attr;
+		path->dentry->d_fsdata = attr;
 	} else {
 		struct mq_attr def_attr;
 
@@ -744,71 +744,51 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
 					  ipc_ns->mq_msgsize_default);
 		ret = mq_attr_ok(ipc_ns, &def_attr);
 		if (ret)
-			goto out;
+			return ERR_PTR(ret);
 	}
 
 	mode &= ~current_umask();
-	ret = mnt_want_write(ipc_ns->mq_mnt);
+	ret = mnt_want_write(path->mnt);
 	if (ret)
-		goto out;
-	ret = vfs_create(dir->d_inode, dentry, mode, true);
-	dentry->d_fsdata = NULL;
-	if (ret)
-		goto out_drop_write;
-
-	result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+		return ERR_PTR(ret);
+	ret = vfs_create(dir, path->dentry, mode, true);
+	path->dentry->d_fsdata = NULL;
+	if (!ret)
+		result = dentry_open(path, oflag, cred);
+	else
+		result = ERR_PTR(ret);
 	/*
 	 * dentry_open() took a persistent mnt_want_write(),
 	 * so we can now drop this one.
 	 */
-	mnt_drop_write(ipc_ns->mq_mnt);
+	mnt_drop_write(path->mnt);
 	return result;
-
-out_drop_write:
-	mnt_drop_write(ipc_ns->mq_mnt);
-out:
-	dput(dentry);
-	mntput(ipc_ns->mq_mnt);
-	return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
-static struct file *do_open(struct ipc_namespace *ipc_ns,
-				struct dentry *dentry, int oflag)
+static struct file *do_open(struct path *path, int oflag)
 {
-	int ret;
-	const struct cred *cred = current_cred();
-
 	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 						  MAY_READ | MAY_WRITE };
-
-	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
-		ret = -EACCES;
-		goto err;
-	}
-
-	return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
-
-err:
-	dput(dentry);
-	mntput(ipc_ns->mq_mnt);
-	return ERR_PTR(ret);
+	int acc;
+	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+		return ERR_PTR(-EINVAL);
+	acc = oflag2acc[oflag & O_ACCMODE];
+	if (inode_permission(path->dentry->d_inode, acc))
+		return ERR_PTR(-EACCES);
+	return dentry_open(path, oflag, current_cred());
 }
 
 SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 		struct mq_attr __user *, u_attr)
 {
-	struct dentry *dentry;
+	struct path path;
 	struct file *filp;
 	char *name;
 	struct mq_attr attr;
 	int fd, error;
 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+	struct dentry *root = ipc_ns->mq_mnt->mnt_root;
 
 	if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
 		return -EFAULT;
@@ -822,52 +802,49 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 	if (fd < 0)
 		goto out_putname;
 
-	mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
-	dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
+	error = 0;
+	mutex_lock(&root->d_inode->i_mutex);
+	path.dentry = lookup_one_len(name, root, strlen(name));
+	if (IS_ERR(path.dentry)) {
+		error = PTR_ERR(path.dentry);
 		goto out_putfd;
 	}
-	mntget(ipc_ns->mq_mnt);
+	path.mnt = mntget(ipc_ns->mq_mnt);
 
 	if (oflag & O_CREAT) {
-		if (dentry->d_inode) {	/* entry already exists */
-			audit_inode(name, dentry);
+		if (path.dentry->d_inode) {	/* entry already exists */
+			audit_inode(name, path.dentry);
 			if (oflag & O_EXCL) {
 				error = -EEXIST;
 				goto out;
 			}
-			filp = do_open(ipc_ns, dentry, oflag);
+			filp = do_open(&path, oflag);
 		} else {
-			filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
-						dentry, oflag, mode,
+			filp = do_create(ipc_ns, root->d_inode,
+						&path, oflag, mode,
 						u_attr ? &attr : NULL);
 		}
 	} else {
-		if (!dentry->d_inode) {
+		if (!path.dentry->d_inode) {
 			error = -ENOENT;
 			goto out;
 		}
-		audit_inode(name, dentry);
-		filp = do_open(ipc_ns, dentry, oflag);
+		audit_inode(name, path.dentry);
+		filp = do_open(&path, oflag);
 	}
 
-	if (IS_ERR(filp)) {
+	if (!IS_ERR(filp))
+		fd_install(fd, filp);
+	else
 		error = PTR_ERR(filp);
-		goto out_putfd;
-	}
-
-	fd_install(fd, filp);
-	goto out_upsem;
-
 out:
-	dput(dentry);
-	mntput(ipc_ns->mq_mnt);
+	path_put(&path);
 out_putfd:
-	put_unused_fd(fd);
-	fd = error;
-out_upsem:
-	mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+	if (error) {
+		put_unused_fd(fd);
+		fd = error;
+	}
+	mutex_unlock(&root->d_inode->i_mutex);
 out_putname:
 	putname(name);
 	return fd;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 372ec6502aa..e423f5fe67f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2157,8 +2157,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 						get_file(devnull);
 					} else {
 						devnull = dentry_open(
-							dget(selinux_null),
-							mntget(selinuxfs_mount),
+							&selinux_null,
 							O_RDWR, cred);
 						if (IS_ERR(devnull)) {
 							devnull = NULL;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index dde2005407a..6d3885165d1 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -221,7 +221,7 @@ extern void selinux_status_update_policyload(int seqno);
 extern void selinux_complete_init(void);
 extern int selinux_disable(void);
 extern void exit_sel_fs(void);
-extern struct dentry *selinux_null;
+extern struct path selinux_null;
 extern struct vfsmount *selinuxfs_mount;
 extern void selnl_notify_setenforce(int val);
 extern void selnl_notify_policyload(u32 seqno);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 3ad29025128..298e695d682 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1297,7 +1297,7 @@ out:
 
 #define NULL_FILE_NAME "null"
 
-struct dentry *selinux_null;
+struct path selinux_null;
 
 static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf,
 					    size_t count, loff_t *ppos)
@@ -1838,7 +1838,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 
 	init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3));
 	d_add(dentry, inode);
-	selinux_null = dentry;
+	selinux_null.dentry = dentry;
 
 	dentry = sel_make_dir(sb->s_root, "avc", &sel_last_ino);
 	if (IS_ERR(dentry)) {
@@ -1912,7 +1912,7 @@ static int __init init_sel_fs(void)
 		return err;
 	}
 
-	selinuxfs_mount = kern_mount(&sel_fs_type);
+	selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
 	if (IS_ERR(selinuxfs_mount)) {
 		printk(KERN_ERR "selinuxfs:  could not mount!\n");
 		err = PTR_ERR(selinuxfs_mount);
-- 
cgit v1.2.3-70-g09d2


From 11e62a8fabd003352e852e74e1b64a437fd908c6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Jul 2012 11:17:49 +0400
Subject: btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0e92e576300..1e9f6c019ad 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3268,7 +3268,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 	if (fs_info->sb->s_flags & MS_RDONLY)
 		return -EROFS;
 
-	ret = mnt_want_write(file->f_path.mnt);
+	ret = mnt_want_write_file(file);
 	if (ret)
 		return ret;
 
@@ -3338,7 +3338,7 @@ out_bargs:
 out:
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8cae6f7158ec1fa44c8a04a43db7d8020ec60437 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Jul 2012 11:19:07 +0400
Subject: ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f36..23788b34597 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -390,7 +390,7 @@ group_add_out:
 		if (err)
 			return err;
 
-		err = mnt_want_write(filp->f_path.mnt);
+		err = mnt_want_write_file(filp);
 		if (err)
 			goto resizefs_out;
 
@@ -402,7 +402,7 @@ group_add_out:
 		}
 		if (err == 0)
 			err = err2;
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 resizefs_out:
 		ext4_resize_end(sb);
 		return err;
-- 
cgit v1.2.3-70-g09d2


From 0ec4f431eb56d633da3a55da67d5c4b88886ccc7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 23 Jul 2012 15:17:17 -0400
Subject: locks: fix checking of fcntl_setlease argument

The only checks of the long argument passed to fcntl(fd,F_SETLEASE,.)
are done after converting the long to an int.  Thus some illegal values
may be let through and cause problems in later code.

[ They actually *don't* cause problems in mainline, as of Dave Jones's
  commit 8d657eb3b438 "Remove easily user-triggerable BUG from
  generic_setlease", but we should fix this anyway.  And this patch will
  be necessary to fix real bugs on earlier kernels. ]

Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/locks.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index fce6238d52c..82c353304f9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
 	return 0;
 }
 
-static int assign_type(struct file_lock *fl, int type)
+static int assign_type(struct file_lock *fl, long type)
 {
 	switch (type) {
 	case F_RDLCK:
@@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = {
 /*
  * Initialize a lease, use the default lock manager operations
  */
-static int lease_init(struct file *filp, int type, struct file_lock *fl)
+static int lease_init(struct file *filp, long type, struct file_lock *fl)
  {
 	if (assign_type(fl, type) != 0)
 		return -EINVAL;
@@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
 }
 
 /* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, int type)
+static struct file_lock *lease_alloc(struct file *filp, long type)
 {
 	struct file_lock *fl = locks_alloc_lock();
 	int error = -ENOMEM;
-- 
cgit v1.2.3-70-g09d2