From d03856bd5e5abac717da137dc60fe4a691769bd0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 2 Aug 2008 18:51:32 -0400
Subject: ext4: Fix data corruption when writing to prealloc area

Inserting an extent can cause a new entry in the already existing index
block. That doesn't increase the depth of the instead. Instead it adds a
new leaf block. Now with the new leaf block the path information
corresponding to the logical block should be fetched from the new block.
The old path will be pointing to the old leaf block.

We need to recalucate the path information on extent insert
even if depth doesn't change. Without this change, the extent merge
after converting an unwritten extent to initialized extent takes the wrong
extent and cause data corruption.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892e..8ee1fa54a4e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2323,7 +2323,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		unsigned int newdepth;
 		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
 		if (allocated <= EXT4_EXT_ZERO_LEN) {
-			/* Mark first half uninitialized.
+			/*
+			 * iblock == ee_block is handled by the zerouout
+			 * at the beginning.
+			 * Mark first half uninitialized.
 			 * Mark second half initialized and zero out the
 			 * initialized extent
 			 */
@@ -2346,7 +2349,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 				ex->ee_len   = orig_ex.ee_len;
 				ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
 				ext4_ext_dirty(handle, inode, path + depth);
-				/* zeroed the full extent */
+				/* blocks available from iblock */
 				return allocated;
 
 			} else if (err)
@@ -2374,6 +2377,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 					err = PTR_ERR(path);
 					return err;
 				}
+				/* get the second half extent details */
 				ex = path[depth].p_ext;
 				err = ext4_ext_get_access(handle, inode,
 								path + depth);
@@ -2403,6 +2407,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 			/* zeroed the full extent */
+			/* blocks available from iblock */
 			return allocated;
 
 		} else if (err)
@@ -2418,23 +2423,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 */
 		orig_ex.ee_len = cpu_to_le16(ee_len -
 						ext4_ext_get_actual_len(ex3));
-		if (newdepth != depth) {
-			depth = newdepth;
-			ext4_ext_drop_refs(path);
-			path = ext4_ext_find_extent(inode, iblock, path);
-			if (IS_ERR(path)) {
-				err = PTR_ERR(path);
-				goto out;
-			}
-			eh = path[depth].p_hdr;
-			ex = path[depth].p_ext;
-			if (ex2 != &newex)
-				ex2 = ex;
-
-			err = ext4_ext_get_access(handle, inode, path + depth);
-			if (err)
-				goto out;
+		depth = newdepth;
+		ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode, iblock, path);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			goto out;
 		}
+		eh = path[depth].p_hdr;
+		ex = path[depth].p_ext;
+		if (ex2 != &newex)
+			ex2 = ex;
+
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
+			goto out;
+
 		allocated = max_blocks;
 
 		/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2456,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 			/* zero out the first half */
+			/* blocks available from iblock */
 			return allocated;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 8a266467b8c4841ca994d0fe59f39e584650e3df Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Jul 2008 14:34:21 -0400
Subject: ext4: Allow read/only mounts with corrupted block group checksums

If the block group checksums are corrupted, still allow the mount to
succeed, so e2fsck can have a chance to try to fix things up.  Add
code in the remount r/w path to make sure the block group checksums
are valid before allowing the filesystem to be remounted read/write.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff1..876e1c62036 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1626,7 +1626,8 @@ static int ext4_check_descriptors(struct super_block *sb)
 			       "Checksum for group %lu failed (%u!=%u)\n",
 			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
 			       gdp)), le16_to_cpu(gdp->bg_checksum));
-			return 0;
+			if (!(sb->s_flags & MS_RDONLY))
+				return 0;
 		}
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
@@ -2961,6 +2962,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 	ext4_fsblk_t n_blocks_count = 0;
 	unsigned long old_sb_flags;
 	struct ext4_mount_options old_opts;
+	ext4_group_t g;
 	int err;
 #ifdef CONFIG_QUOTA
 	int i;
@@ -3038,6 +3040,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 				goto restore_opts;
 			}
 
+			/*
+			 * Make sure the group descriptor checksums
+			 * are sane.  If they aren't, refuse to
+			 * remount r/w.
+			 */
+			for (g = 0; g < sbi->s_groups_count; g++) {
+				struct ext4_group_desc *gdp =
+					ext4_get_group_desc(sb, g, NULL);
+
+				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
+					printk(KERN_ERR
+	       "EXT4-fs: ext4_remount: "
+		"Checksum for group %lu failed (%u!=%u)\n",
+		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+					       le16_to_cpu(gdp->bg_checksum));
+					err = -EINVAL;
+					goto restore_opts;
+				}
+			}
+
 			/*
 			 * If we have an unprocessed orphan list hanging
 			 * around from a previously readonly bdev mount,
-- 
cgit v1.2.3-70-g09d2


From e29d1cde63be0b5f1739416b5574a83c34bf8eeb Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 2 Aug 2008 21:21:02 -0400
Subject: ext4: sync up block and inode bitmap reading functions

ext4_read_block_bitmap and read_inode_bitmap do essentially
the same thing, and yet they are structured quite differently.
I came across this difference while looking at doing bg locking
during bg initialization.

This patch:

* removes unnecessary casts in the error messages
* renames read_inode_bitmap to ext4_read_inode_bitmap
* and more substantially, restructures the inode bitmap
  reading function to be more like the block bitmap counterpart.

The change to the inode bitmap reader simplifies the locking
to be applied in the next patch.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/balloc.c |  8 ++++----
 fs/ext4/ialloc.c | 51 +++++++++++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b983..386cb79ac4b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,8 +314,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 	if (unlikely(!bh)) {
 		ext4_error(sb, __func__,
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = %llu",
-			    (int)block_group, (unsigned long long)bitmap_blk);
+			    "block_group = %lu, block_bitmap = %llu",
+			    block_group, bitmap_blk);
 		return NULL;
 	}
 	if (bh_uptodate_or_lock(bh))
@@ -331,8 +331,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 		put_bh(bh);
 		ext4_error(sb, __func__,
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = %llu",
-			    (int)block_group, (unsigned long long)bitmap_blk);
+			    "block_group = %lu, block_bitmap = %llu",
+			    block_group, bitmap_blk);
 		return NULL;
 	}
 	ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344..09cdcd5914d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,41 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
  * Return buffer_head of bitmap on success or NULL.
  */
 static struct buffer_head *
-read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
 	struct ext4_group_desc *desc;
 	struct buffer_head *bh = NULL;
+	ext4_fsblk_t bitmap_blk;
 
 	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
-		goto error_out;
+		return NULL;
+	bitmap_blk = ext4_inode_bitmap(sb, desc);
+	bh = sb_getblk(sb, bitmap_blk);
+	if (unlikely(!bh)) {
+		ext4_error(sb, __func__,
+			    "Cannot read inode bitmap - "
+			    "block_group = %lu, inode_bitmap = %llu",
+			    block_group, bitmap_blk);
+		return NULL;
+	}
+	if (bh_uptodate_or_lock(bh))
+		return bh;
+
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-		bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
-		if (!buffer_uptodate(bh)) {
-			lock_buffer(bh);
-			if (!buffer_uptodate(bh)) {
-				ext4_init_inode_bitmap(sb, bh, block_group,
-						       desc);
-				set_buffer_uptodate(bh);
-			}
-			unlock_buffer(bh);
-		}
-	} else {
-		bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+		ext4_init_inode_bitmap(sb, bh, block_group, desc);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		return bh;
 	}
-	if (!bh)
-		ext4_error(sb, "read_inode_bitmap",
+	if (bh_submit_read(bh) < 0) {
+		put_bh(bh);
+		ext4_error(sb, __func__,
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %llu",
-			    block_group, ext4_inode_bitmap(sb, desc));
-error_out:
+			    block_group, bitmap_blk);
+		return NULL;
+	}
 	return bh;
 }
 
@@ -200,7 +207,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 	}
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
-	bitmap_bh = read_inode_bitmap(sb, block_group);
+	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh)
 		goto error_return;
 
@@ -623,7 +630,7 @@ got_group:
 			goto fail;
 
 		brelse(bitmap_bh);
-		bitmap_bh = read_inode_bitmap(sb, group);
+		bitmap_bh = ext4_read_inode_bitmap(sb, group);
 		if (!bitmap_bh)
 			goto fail;
 
@@ -891,7 +898,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
-	bitmap_bh = read_inode_bitmap(sb, block_group);
+	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
 		ext4_warning(sb, __func__,
 			     "inode bitmap error for orphan %lu", ino);
@@ -969,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
 		brelse(bitmap_bh);
-		bitmap_bh = read_inode_bitmap(sb, i);
+		bitmap_bh = ext4_read_inode_bitmap(sb, i);
 		if (!bitmap_bh)
 			continue;
 
-- 
cgit v1.2.3-70-g09d2


From b5f10eed8125702929e57cca7e5956b1b9b6d015 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 2 Aug 2008 21:21:08 -0400
Subject: ext4: lock block groups when initializing

I noticed when filling a 1T filesystem with 4 threads using the
fs_mark benchmark:

fs_mark -d /mnt/test -D 256 -n 100000 -t 4 -s 20480 -F -S 0

that I occasionally got checksum mismatch errors:

EXT4-fs error (device sdb): ext4_init_inode_bitmap: Checksum bad for group 6935

etc.  I'd reliably get 4-5 of them during the run.

It appears that the problem is likely a race to init the bg's
when the uninit_bg feature is enabled.

With the patch below, which adds sb_bgl_locking around initialization,
I was able to complete several runs with no errors or warnings.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/balloc.c  | 3 +++
 fs/ext4/ialloc.c  | 5 ++++-
 fs/ext4/mballoc.c | 3 +++
 fs/ext4/super.c   | 2 ++
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 386cb79ac4b..1ae5004e93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -321,12 +321,15 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 	if (bh_uptodate_or_lock(bh))
 		return bh;
 
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 		return bh;
 	}
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
 		ext4_error(sb, __func__,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 09cdcd5914d..655e760212b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -118,12 +118,15 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	if (bh_uptodate_or_lock(bh))
 		return bh;
 
+	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 		return bh;
 	}
+	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
 		ext4_error(sb, __func__,
@@ -735,7 +738,7 @@ got:
 
 			/* When marking the block group with
 			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
-			 * on the value of bg_itable_unsed even though
+			 * on the value of bg_itable_unused even though
 			 * mke2fs could have initialized the same for us.
 			 * Instead we calculated the value below
 			 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbe..4258d3289a6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 		if (bh_uptodate_or_lock(bh[i]))
 			continue;
 
+		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
 			set_buffer_uptodate(bh[i]);
 			unlock_buffer(bh[i]);
+			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
 			continue;
 		}
+		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
 		get_bh(bh[i]);
 		bh[i]->b_end_io = end_buffer_read_sync;
 		submit_bh(READ, bh[i]);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 876e1c62036..511997ef6f0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1621,6 +1621,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 			       "(block %llu)!", i, inode_table);
 			return 0;
 		}
+		spin_lock(sb_bgl_lock(sbi, i));
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
 			       "Checksum for group %lu failed (%u!=%u)\n",
@@ -1629,6 +1630,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 			if (!(sb->s_flags & MS_RDONLY))
 				return 0;
 		}
+		spin_unlock(sb_bgl_lock(sbi, i));
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 	}
-- 
cgit v1.2.3-70-g09d2


From ce89f46cb833f89c58a08240faa6b5e963086b8a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 14:09:29 -0400
Subject: ext4: Improve error handling in mballoc

Don't call BUG_ON on file system failures. Instead use ext4_error and
also handle the continue case properly.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4258d3289a6..500d3920d41 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3736,20 +3736,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 	bitmap_bh = ext4_read_block_bitmap(sb, group);
 	if (bitmap_bh == NULL) {
-		/* error handling here */
-		ext4_mb_release_desc(&e4b);
-		BUG_ON(bitmap_bh == NULL);
+		ext4_error(sb, __func__, "Error in reading block "
+				"bitmap for %lu\n", group);
+		return 0;
 	}
 
 	err = ext4_mb_load_buddy(sb, group, &e4b);
-	BUG_ON(err != 0); /* error handling here */
+	if (err) {
+		ext4_error(sb, __func__, "Error in loading buddy "
+				"information for %lu\n", group);
+		put_bh(bitmap_bh);
+		return 0;
+	}
 
 	if (needed == 0)
 		needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
 
-	grp = ext4_get_group_info(sb, group);
 	INIT_LIST_HEAD(&list);
-
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
 	ext4_lock_group(sb, group);
@@ -3906,13 +3909,18 @@ repeat:
 		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
 
 		err = ext4_mb_load_buddy(sb, group, &e4b);
-		BUG_ON(err != 0); /* error handling here */
+		if (err) {
+			ext4_error(sb, __func__, "Error in loading buddy "
+					"information for %lu\n", group);
+			continue;
+		}
 
 		bitmap_bh = ext4_read_block_bitmap(sb, group);
 		if (bitmap_bh == NULL) {
-			/* error handling here */
+			ext4_error(sb, __func__, "Error in reading block "
+					"bitmap for %lu\n", group);
 			ext4_mb_release_desc(&e4b);
-			BUG_ON(bitmap_bh == NULL);
+			continue;
 		}
 
 		ext4_lock_group(sb, group);
@@ -4423,11 +4431,15 @@ do_more:
 		count -= overflow;
 	}
 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
-	if (!bitmap_bh)
+	if (!bitmap_bh) {
+		err = -EIO;
 		goto error_return;
+	}
 	gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
-	if (!gdp)
+	if (!gdp) {
+		err = -EIO;
 		goto error_return;
+	}
 
 	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
 	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
-- 
cgit v1.2.3-70-g09d2


From 1320cbcf771a20b44cf580712b843d213ae75cd3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 14:09:26 -0400
Subject: ext4: Convert the usage of NR_CPUS to nr_cpu_ids.

NR_CPUS can be really large. We should be using nr_cpu_ids instead.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 500d3920d41..49bec8404c5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2540,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
 	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
 
-	i = sizeof(struct ext4_locality_group) * NR_CPUS;
+	i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
 	sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
 	if (sbi->s_locality_groups == NULL) {
 		clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2548,7 +2548,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		kfree(sbi->s_mb_maxs);
 		return -ENOMEM;
 	}
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		struct ext4_locality_group *lg;
 		lg = &sbi->s_locality_groups[i];
 		mutex_init(&lg->lg_mutex);
-- 
cgit v1.2.3-70-g09d2


From 6be2ded1d7c51b39144b9f07d2c839e1bd8707f1 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 14:14:05 -0400
Subject: ext4: Don't allow lg prealloc list to be grow large.

Currently, the locality group prealloc list is freed only when there
is a block allocation failure. This can result in large number of
entries in the preallocation list making ext4_mb_use_preallocated()
expensive.

To fix this, we convert the locality group prealloc list to a hash
list. The hash index is the order of number of blocks in the prealloc
space with a max order of 9. When adding prealloc space to the list we
make sure total entries for each order does not exceed 8. If it is
more than 8 we discard few entries and make sure the we have only <= 5
entries.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++-------
 fs/ext4/mballoc.h |  10 ++-
 2 files changed, 193 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 49bec8404c5..865e9ddb44d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2480,7 +2480,7 @@ err_freesgi:
 int ext4_mb_init(struct super_block *sb, int needs_recovery)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	unsigned i;
+	unsigned i, j;
 	unsigned offset;
 	unsigned max;
 	int ret;
@@ -2552,7 +2552,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		struct ext4_locality_group *lg;
 		lg = &sbi->s_locality_groups[i];
 		mutex_init(&lg->lg_mutex);
-		INIT_LIST_HEAD(&lg->lg_prealloc_list);
+		for (j = 0; j < PREALLOC_TB_SIZE; j++)
+			INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 
@@ -3263,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 				struct ext4_prealloc_space *pa)
 {
 	unsigned int len = ac->ac_o_ex.fe_len;
+
 	ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
 					&ac->ac_b_ex.fe_group,
 					&ac->ac_b_ex.fe_start);
@@ -3285,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 static noinline_for_stack int
 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
+	int order, i;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
 	struct ext4_prealloc_space *pa;
@@ -3325,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	lg = ac->ac_lg;
 	if (lg == NULL)
 		return 0;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
-		spin_lock(&pa->pa_lock);
-		if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
-			atomic_inc(&pa->pa_count);
-			ext4_mb_use_group_pa(ac, pa);
+	order  = fls(ac->ac_o_ex.fe_len) - 1;
+	if (order > PREALLOC_TB_SIZE - 1)
+		/* The max size of hash table is PREALLOC_TB_SIZE */
+		order = PREALLOC_TB_SIZE - 1;
+
+	for (i = order; i < PREALLOC_TB_SIZE; i++) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+					pa_inode_list) {
+			spin_lock(&pa->pa_lock);
+			if (pa->pa_deleted == 0 &&
+					pa->pa_free >= ac->ac_o_ex.fe_len) {
+				atomic_inc(&pa->pa_count);
+				ext4_mb_use_group_pa(ac, pa);
+				spin_unlock(&pa->pa_lock);
+				ac->ac_criteria = 20;
+				rcu_read_unlock();
+				return 1;
+			}
 			spin_unlock(&pa->pa_lock);
-			ac->ac_criteria = 20;
-			rcu_read_unlock();
-			return 1;
 		}
-		spin_unlock(&pa->pa_lock);
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
-
 	return 0;
 }
 
@@ -3563,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	pa->pa_free = pa->pa_len;
 	atomic_set(&pa->pa_count, 1);
 	spin_lock_init(&pa->pa_lock);
+	INIT_LIST_HEAD(&pa->pa_inode_list);
 	pa->pa_deleted = 0;
 	pa->pa_linear = 1;
 
@@ -3583,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 
-	spin_lock(pa->pa_obj_lock);
-	list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
-	spin_unlock(pa->pa_obj_lock);
-
+	/*
+	 * We will later add the new pa to the right bucket
+	 * after updating the pa_free in ext4_mb_release_context
+	 */
 	return 0;
 }
 
@@ -4123,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 
 }
 
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+					struct ext4_locality_group *lg,
+					int order, int total_entries)
+{
+	ext4_group_t group = 0;
+	struct ext4_buddy e4b;
+	struct list_head discard_list;
+	struct ext4_prealloc_space *pa, *tmp;
+	struct ext4_allocation_context *ac;
+
+	mb_debug("discard locality group preallocation\n");
+
+	INIT_LIST_HEAD(&discard_list);
+	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+	spin_lock(&lg->lg_prealloc_lock);
+	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
+						pa_inode_list) {
+		spin_lock(&pa->pa_lock);
+		if (atomic_read(&pa->pa_count)) {
+			/*
+			 * This is the pa that we just used
+			 * for block allocation. So don't
+			 * free that
+			 */
+			spin_unlock(&pa->pa_lock);
+			continue;
+		}
+		if (pa->pa_deleted) {
+			spin_unlock(&pa->pa_lock);
+			continue;
+		}
+		/* only lg prealloc space */
+		BUG_ON(!pa->pa_linear);
+
+		/* seems this one can be freed ... */
+		pa->pa_deleted = 1;
+		spin_unlock(&pa->pa_lock);
+
+		list_del_rcu(&pa->pa_inode_list);
+		list_add(&pa->u.pa_tmp_list, &discard_list);
+
+		total_entries--;
+		if (total_entries <= 5) {
+			/*
+			 * we want to keep only 5 entries
+			 * allowing it to grow to 8. This
+			 * mak sure we don't call discard
+			 * soon for this list.
+			 */
+			break;
+		}
+	}
+	spin_unlock(&lg->lg_prealloc_lock);
+
+	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+
+		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+		if (ext4_mb_load_buddy(sb, group, &e4b)) {
+			ext4_error(sb, __func__, "Error in loading buddy "
+					"information for %lu\n", group);
+			continue;
+		}
+		ext4_lock_group(sb, group);
+		list_del(&pa->pa_group_list);
+		ext4_mb_release_group_pa(&e4b, pa, ac);
+		ext4_unlock_group(sb, group);
+
+		ext4_mb_release_desc(&e4b);
+		list_del(&pa->u.pa_tmp_list);
+		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+	}
+	if (ac)
+		kmem_cache_free(ext4_ac_cachep, ac);
+}
+
+/*
+ * We have incremented pa_count. So it cannot be freed at this
+ * point. Also we hold lg_mutex. So no parallel allocation is
+ * possible from this lg. That means pa_free cannot be updated.
+ *
+ * A parallel ext4_mb_discard_group_preallocations is possible.
+ * which can cause the lg_prealloc_list to be updated.
+ */
+
+static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
+{
+	int order, added = 0, lg_prealloc_count = 1;
+	struct super_block *sb = ac->ac_sb;
+	struct ext4_locality_group *lg = ac->ac_lg;
+	struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
+
+	order = fls(pa->pa_free) - 1;
+	if (order > PREALLOC_TB_SIZE - 1)
+		/* The max size of hash table is PREALLOC_TB_SIZE */
+		order = PREALLOC_TB_SIZE - 1;
+	/* Add the prealloc space to lg */
+	rcu_read_lock();
+	list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+						pa_inode_list) {
+		spin_lock(&tmp_pa->pa_lock);
+		if (tmp_pa->pa_deleted) {
+			spin_unlock(&pa->pa_lock);
+			continue;
+		}
+		if (!added && pa->pa_free < tmp_pa->pa_free) {
+			/* Add to the tail of the previous entry */
+			list_add_tail_rcu(&pa->pa_inode_list,
+						&tmp_pa->pa_inode_list);
+			added = 1;
+			/*
+			 * we want to count the total
+			 * number of entries in the list
+			 */
+		}
+		spin_unlock(&tmp_pa->pa_lock);
+		lg_prealloc_count++;
+	}
+	if (!added)
+		list_add_tail_rcu(&pa->pa_inode_list,
+					&lg->lg_prealloc_list[order]);
+	rcu_read_unlock();
+
+	/* Now trim the list to be not more than 8 elements */
+	if (lg_prealloc_count > 8) {
+		ext4_mb_discard_lg_preallocations(sb, lg,
+						order, lg_prealloc_count);
+		return;
+	}
+	return ;
+}
+
 /*
  * release all resource we used in allocation
  */
 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 {
-	if (ac->ac_pa) {
-		if (ac->ac_pa->pa_linear) {
+	struct ext4_prealloc_space *pa = ac->ac_pa;
+	if (pa) {
+		if (pa->pa_linear) {
 			/* see comment in ext4_mb_use_group_pa() */
-			spin_lock(&ac->ac_pa->pa_lock);
-			ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
-			ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
-			ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
-			ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
-			spin_unlock(&ac->ac_pa->pa_lock);
+			spin_lock(&pa->pa_lock);
+			pa->pa_pstart += ac->ac_b_ex.fe_len;
+			pa->pa_lstart += ac->ac_b_ex.fe_len;
+			pa->pa_free -= ac->ac_b_ex.fe_len;
+			pa->pa_len -= ac->ac_b_ex.fe_len;
+			spin_unlock(&pa->pa_lock);
+			/*
+			 * We want to add the pa to the right bucket.
+			 * Remove it from the list and while adding
+			 * make sure the list to which we are adding
+			 * doesn't grow big.
+			 */
+			if (likely(pa->pa_free)) {
+				spin_lock(pa->pa_obj_lock);
+				list_del_rcu(&pa->pa_inode_list);
+				spin_unlock(pa->pa_obj_lock);
+				ext4_mb_add_n_trim(ac);
+			}
 		}
-		ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa);
+		ext4_mb_put_pa(ac, ac->ac_sb, pa);
 	}
 	if (ac->ac_bitmap_page)
 		page_cache_release(ac->ac_bitmap_page);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bc..c7c9906c2a7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
  * Locality group:
  *   we try to group all related changes together
  *   so that writeback can flush/allocate them together as well
+ *   Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ *   (512). We store prealloc space into the hash based on the pa_free blocks
+ *   order value.ie, fls(pa_free)-1;
  */
+#define PREALLOC_TB_SIZE 10
 struct ext4_locality_group {
 	/* for allocator */
-	struct mutex		lg_mutex;	/* to serialize allocates */
-	struct list_head	lg_prealloc_list;/* list of preallocations */
+	/* to serialize allocates */
+	struct mutex		lg_mutex;
+	/* list of preallocations */
+	struct list_head	lg_prealloc_list[PREALLOC_TB_SIZE];
 	spinlock_t		lg_prealloc_lock;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 9c83a923c67df311c467ec956009f0eb4019195d Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Sat, 26 Jul 2008 16:39:26 -0400
Subject: ext4: don't read inode block if the buffer has a write error

A transient I/O error can corrupt inode data.  Here is the scenario:

(1) update inode_A at the block_B
(2) pdflush writes out new inode_A to the filesystem, but it results
    in write I/O error, at this point, BH_Uptodate flag of the buffer
    for block_B is cleared and BH_Write_EIO is set
(3) create new inode_C which located at block_B, and
    __ext4_get_inode_loc() tries to read on-disk block_B because the
    buffer is not uptodate
(4) if it can read on-disk block_B successfully, inode_A is
    overwritten by old data

This patch makes __ext4_get_inode_loc() not read the inode block if the
buffer has BH_Write_EIO flag.  In this case, the buffer should have the
latest information, so setting the uptodate flag to the buffer (this
avoids WARN_ON_ONCE() in mark_buffer_dirty().)

According to this change, we would need to test BH_Write_EIO flag for the
error checking.  Currently nobody checks write I/O errors on metadata
buffers, but it will be done in other patches I'm working on.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: sugita <yumiko.sugita.yf@hitachi.com>
Cc: Satoshi OSHIMA <satoshi.oshima.fk@hitachi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c23..efe8caa3811 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3590,6 +3590,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
 	}
 	if (!buffer_uptodate(bh)) {
 		lock_buffer(bh);
+
+		/*
+		 * If the buffer has the write error flag, we have failed
+		 * to write out another inode in the same block.  In this
+		 * case, we don't have to read the block because we may
+		 * read the old inode data successfully.
+		 */
+		if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+			set_buffer_uptodate(bh);
+
 		if (buffer_uptodate(bh)) {
 			/* someone brought it uptodate while we waited */
 			unlock_buffer(bh);
-- 
cgit v1.2.3-70-g09d2


From e9e34f4e8f42177c66754fec1edfd35e70c18f99 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Thu, 31 Jul 2008 22:26:04 -0400
Subject: jbd2: don't abort if flushing file data failed

In ordered mode, the current jbd2 aborts the journal if a file data buffer
has an error.  But this behavior is unintended, and we found that it has
been adopted accidentally.

This patch undoes it and just calls printk() instead of aborting the
journal.  Unlike a similar patch for ext3/jbd, file data buffers are
written via generic_writepages().  But we also need to set AS_EIO
into their mappings because wait_on_page_writeback_range() clears
AS_EIO before a user process sees it.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be87322..adf0395f318 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 		jinode->i_flags |= JI_COMMIT_RUNNING;
 		spin_unlock(&journal->j_list_lock);
 		err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
-		if (!ret)
-			ret = err;
+		if (err) {
+			/*
+			 * Because AS_EIO is cleared by
+			 * wait_on_page_writeback_range(), set it again so
+			 * that user process can get -EIO from fsync().
+			 */
+			set_bit(AS_EIO,
+				&jinode->i_vfs_inode->i_mapping->flags);
+
+			if (!ret)
+				ret = err;
+		}
 		spin_lock(&journal->j_list_lock);
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
 	 * commit block, which happens below in such setting.
 	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
-	if (err)
-		jbd2_journal_abort(journal, err);
+	if (err) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_WARNING
+			"JBD2: Detected IO errors while flushing file data "
+			"on %s\n", bdevname(journal->j_fs_dev, b));
+		err = 0;
+	}
 
 	/* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
-- 
cgit v1.2.3-70-g09d2


From d5a0d4f732af3438e592efab4cb80076d1dd81b5 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 2 Aug 2008 18:51:06 -0400
Subject: ext4: fix ext4_da_write_begin error path

ext4_da_write_begin needs to call journal_stop before returning,
if the page allocation fails.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index efe8caa3811..37f834bc7cd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2280,8 +2280,11 @@ retry:
 	}
 
 	page = __grab_cache_page(mapping, index);
-	if (!page)
-		return -ENOMEM;
+	if (!page) {
+		ext4_journal_stop(handle);
+		ret = -ENOMEM;
+		goto out;
+	}
 	*pagep = page;
 
 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-- 
cgit v1.2.3-70-g09d2


From 0123c93998511978556b03d2bb023af92aa24d55 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 Aug 2008 20:57:54 -0400
Subject: ext4: Fix ext4_ext_journal_restart()

The ext4_ext_journal_restart() is a convenience function which checks
to see if the requested number of credits is present, and if so it
closes the current transaction and attaches the current handle to the
new transaction.  Unfortunately, it wasn't proprely checking the
return value from ext4_journal_extend(), so it was starting a new
transaction when one was not necessary, and returning an error when
all that was necessary was to restart the handle with a new
transaction.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8ee1fa54a4e..f554703eb92 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
 	if (handle->h_buffer_credits > needed)
 		return 0;
 	err = ext4_journal_extend(handle, needed);
-	if (err)
+	if (err <= 0)
 		return err;
 	return ext4_journal_restart(handle, needed);
 }
-- 
cgit v1.2.3-70-g09d2


From bc965ab3f2b4b7bb898b11d61d25295c2053b8ac Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 2 Aug 2008 21:10:38 -0400
Subject: ext4: Fix lack of credits BUG() when deleting a badly fragmented
 inode

The extents codepath for ext4_truncate() requests journal transaction
credits in very small chunks, requesting only what is needed.  This
means there may not be enough credits left on the transaction handle
after ext4_truncate() returns and then when ext4_delete_inode() tries
finish up its work, it may not have enough transaction credits,
causing a BUG() oops in the jbd2 core.

Also, reserve an extra 2 blocks when starting an ext4_delete_inode()
since we need to update the inode bitmap, as well as update the
orphaned inode linked list.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 37f834bc7cd..2697eaf0368 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
 void ext4_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
+	int err;
 
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
 	if (is_bad_inode(inode))
 		goto no_delete;
 
-	handle = start_transaction(inode);
+	handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
 	if (IS_ERR(handle)) {
+		ext4_std_error(inode->i_sb, PTR_ERR(handle));
 		/*
 		 * If we're going to skip the normal cleanup, we still need to
 		 * make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 	inode->i_size = 0;
+	err = ext4_mark_inode_dirty(handle, inode);
+	if (err) {
+		ext4_warning(inode->i_sb, __func__,
+			     "couldn't mark inode dirty (err %d)", err);
+		goto stop_handle;
+	}
 	if (inode->i_blocks)
 		ext4_truncate(inode);
+
+	/*
+	 * ext4_ext_truncate() doesn't reserve any slop when it
+	 * restarts journal transactions; therefore there may not be
+	 * enough credits left in the handle to remove the inode from
+	 * the orphan list and set the dtime field.
+	 */
+	if (handle->h_buffer_credits < 3) {
+		err = ext4_journal_extend(handle, 3);
+		if (err > 0)
+			err = ext4_journal_restart(handle, 3);
+		if (err != 0) {
+			ext4_warning(inode->i_sb, __func__,
+				     "couldn't extend journal (err %d)", err);
+		stop_handle:
+			ext4_journal_stop(handle);
+			goto no_delete;
+		}
+	}
+
 	/*
 	 * Kill off the orphan record which ext4_truncate created.
 	 * AKPM: I think this can be inside the above `if'.
-- 
cgit v1.2.3-70-g09d2


From 34071da71a665d8c81e3b3467c9a2e7c56386fec Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 Aug 2008 21:59:19 -0400
Subject: ext4: don't assume extents can't cross block groups when truncating

With the FLEX_BG layout, there is no reason why extents can't cross
block groups, so make the truncate code reserve enough credits so we
don't BUG if we come across such an extent.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f554703eb92..f7529e27d79 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			BUG_ON(b != ex_ee_block + ex_ee_len - 1);
 		}
 
-		/* at present, extent can't cross block group: */
-		/* leaf + bitmap + group desc + sb + inode */
-		credits = 5;
+		/*
+		 * 3 for leaf, sb, and inode plus 2 (bmap and group
+		 * descriptor) for each block group; assume two block
+		 * groups plus ex_ee_len/blocks_per_block_group for
+		 * the worst case
+		 */
+		credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
 		if (ex == EXT_FIRST_EXTENT(eh)) {
 			correct_index = 1;
 			credits += (ext_depth(inode)) + 1;
-- 
cgit v1.2.3-70-g09d2


From 12219aea6b944e36795267be31d43f9c484841be Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 17 Jul 2008 16:12:08 -0400
Subject: ext4: Cleanup the block reservation code path

The truncate patch should not use the i_allocated_meta_blocks
value. So add seperate functions to be used in the truncate
and alloc path. We also need to release the meta-data block
that we reserved for the blocks that we are truncating.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  |   1 -
 fs/ext4/inode.c | 108 +++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 67 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b1..6c7924d9e35 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
 
 
 /* inode.c */
-void ext4_da_release_space(struct inode *inode, int used, int to_free);
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 		struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2697eaf0368..85a862c9c4c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -980,6 +980,67 @@ out:
 	return err;
 }
 
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+	int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ind_blks, dind_blks, tind_blks;
+
+	/* number of new indirect blocks needed */
+	ind_blks = (blocks + icap - 1) / icap;
+
+	dind_blks = (ind_blks + icap - 1) / icap;
+
+	tind_blks = 1;
+
+	return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_calc_metadata_amount(inode, blocks);
+
+	return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	int total, mdb, mdb_free;
+
+	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+	/* recalculate the number of metablocks still need to be reserved */
+	total = EXT4_I(inode)->i_reserved_data_blocks - used;
+	mdb = ext4_calc_metadata_amount(inode, total);
+
+	/* figure out how many metablocks to release */
+	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+	mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+	/* Account for allocated meta_blocks */
+	mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+	/* update fs free blocks counter for truncate case */
+	percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+
+	/* update per-inode reservations */
+	BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
+	EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+	EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+	EXT4_I(inode)->i_allocated_meta_blocks = 0;
+	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 /*
@@ -1097,7 +1158,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 		 * which were deferred till now
 		 */
 		if ((retval > 0) && buffer_delay(bh))
-			ext4_da_release_space(inode, retval, 0);
+			ext4_da_update_reserve_space(inode, retval);
 	}
 
 	up_write((&EXT4_I(inode)->i_data_sem));
@@ -1465,36 +1526,6 @@ static int ext4_journalled_write_end(struct file *file,
 
 	return ret ? ret : copied;
 }
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
-	int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-	int ind_blks, dind_blks, tind_blks;
-
-	/* number of new indirect blocks needed */
-	ind_blks = (blocks + icap - 1) / icap;
-
-	dind_blks = (ind_blks + icap - 1) / icap;
-
-	tind_blks = 1;
-
-	return ind_blks + dind_blks + tind_blks;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
-	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
-		return ext4_ext_calc_metadata_amount(inode, blocks);
-
-	return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
 
 static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 {
@@ -1518,7 +1549,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 		return -ENOSPC;
 	}
-
 	/* reduce fs free blocks counter */
 	percpu_counter_sub(&sbi->s_freeblocks_counter, total);
 
@@ -1529,35 +1559,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 	return 0;       /* success */
 }
 
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int total, mdb, mdb_free, release;
 
 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	/* recalculate the number of metablocks still need to be reserved */
-	total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+	total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
 	mdb = ext4_calc_metadata_amount(inode, total);
 
 	/* figure out how many metablocks to release */
 	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
 	mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
 
-	/* Account for allocated meta_blocks */
-	mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
 	release = to_free + mdb_free;
 
 	/* update fs free blocks counter for truncate case */
 	percpu_counter_add(&sbi->s_freeblocks_counter, release);
 
 	/* update per-inode reservations */
-	BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
-	EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+	BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+	EXT4_I(inode)->i_reserved_data_blocks -= to_free;
 
 	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
 	EXT4_I(inode)->i_reserved_meta_blocks = mdb;
-	EXT4_I(inode)->i_allocated_meta_blocks = 0;
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 
@@ -1579,7 +1605,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 		}
 		curr_off = next_off;
 	} while ((bh = bh->b_this_page) != head);
-	ext4_da_release_space(page->mapping->host, 0, to_release);
+	ext4_da_release_space(page->mapping->host, to_release);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From ec05e868ac80cc8fc7de6e5cf773b232198e49af Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 24 Jul 2008 12:49:59 -0400
Subject: ext4: improve ext4_fill_flex_info() a bit

- use kzalloc() instead of kmalloc() + memset()
- improve a printk info

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 511997ef6f0..e34fc2d6dbf 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1506,14 +1506,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
 
 	flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
 		groups_per_flex;
-	sbi->s_flex_groups = kmalloc(flex_group_count *
+	sbi->s_flex_groups = kzalloc(flex_group_count *
 				     sizeof(struct flex_groups), GFP_KERNEL);
 	if (sbi->s_flex_groups == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory\n");
+		printk(KERN_ERR "EXT4-fs: not enough memory for "
+				"%lu flex groups\n", flex_group_count);
 		goto failed;
 	}
-	memset(sbi->s_flex_groups, 0, flex_group_count *
-	       sizeof(struct flex_groups));
 
 	gdp = ext4_get_group_desc(sb, 1, &bh);
 	block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
-- 
cgit v1.2.3-70-g09d2


From 2b2d6d019724de6e51ac5bcf22b5ef969daefa8b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Jul 2008 16:15:44 -0400
Subject: ext4: Cleanup whitespace and other miscellaneous style issues

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/acl.c     | 188 ++++++++++++++++++------------------
 fs/ext4/extents.c |   2 +-
 fs/ext4/inode.c   |   5 +-
 fs/ext4/resize.c  |  79 +++++++--------
 fs/ext4/super.c   | 283 +++++++++++++++++++++++++++---------------------------
 fs/ext4/xattr.c   |   2 +-
 6 files changed, 277 insertions(+), 282 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e16544..694ed6fadcc 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
 	acl = posix_acl_alloc(count, GFP_NOFS);
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
-	for (n=0; n < count; n++) {
+	for (n = 0; n < count; n++) {
 		ext4_acl_entry *entry =
 			(ext4_acl_entry *)value;
 		if ((char *)value + sizeof(ext4_acl_entry_short) > end)
 			goto fail;
 		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
 		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
-		switch(acl->a_entries[n].e_tag) {
-			case ACL_USER_OBJ:
-			case ACL_GROUP_OBJ:
-			case ACL_MASK:
-			case ACL_OTHER:
-				value = (char *)value +
-					sizeof(ext4_acl_entry_short);
-				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
-				break;
-
-			case ACL_USER:
-			case ACL_GROUP:
-				value = (char *)value + sizeof(ext4_acl_entry);
-				if ((char *)value > end)
-					goto fail;
-				acl->a_entries[n].e_id =
-					le32_to_cpu(entry->e_id);
-				break;
-
-			default:
+
+		switch (acl->a_entries[n].e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			value = (char *)value +
+				sizeof(ext4_acl_entry_short);
+			acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+			break;
+
+		case ACL_USER:
+		case ACL_GROUP:
+			value = (char *)value + sizeof(ext4_acl_entry);
+			if ((char *)value > end)
 				goto fail;
+			acl->a_entries[n].e_id =
+				le32_to_cpu(entry->e_id);
+			break;
+
+		default:
+			goto fail;
 		}
 	}
 	if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
 		return ERR_PTR(-ENOMEM);
 	ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
 	e = (char *)ext_acl + sizeof(ext4_acl_header);
-	for (n=0; n < acl->a_count; n++) {
+	for (n = 0; n < acl->a_count; n++) {
 		ext4_acl_entry *entry = (ext4_acl_entry *)e;
 		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
 		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-		switch(acl->a_entries[n].e_tag) {
-			case ACL_USER:
-			case ACL_GROUP:
-				entry->e_id =
-					cpu_to_le32(acl->a_entries[n].e_id);
-				e += sizeof(ext4_acl_entry);
-				break;
-
-			case ACL_USER_OBJ:
-			case ACL_GROUP_OBJ:
-			case ACL_MASK:
-			case ACL_OTHER:
-				e += sizeof(ext4_acl_entry_short);
-				break;
-
-			default:
-				goto fail;
+		switch (acl->a_entries[n].e_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+			e += sizeof(ext4_acl_entry);
+			break;
+
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			e += sizeof(ext4_acl_entry_short);
+			break;
+
+		default:
+			goto fail;
 		}
 	}
 	return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext4_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT4_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext4_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT4_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		acl = ext4_iget_acl(inode, &ei->i_acl);
+		if (acl != EXT4_ACL_NOT_CACHED)
+			return acl;
+		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		acl = ext4_iget_acl(inode, &ei->i_default_acl);
+		if (acl != EXT4_ACL_NOT_CACHED)
+			return acl;
+		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+
+	default:
+		return ERR_PTR(-EINVAL);
 	}
 	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
 	kfree(value);
 
 	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext4_iset_acl(inode, &ei->i_acl, acl);
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				ext4_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
+		switch (type) {
+		case ACL_TYPE_ACCESS:
+			ext4_iset_acl(inode, &ei->i_acl, acl);
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			ext4_iset_acl(inode, &ei->i_default_acl, acl);
+			break;
 		}
 	}
 	return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
-			if (acl) {
-				mode_t mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
-				if (error < 0)
-					return error;
-				else {
-					inode->i_mode = mode;
-					ext4_mark_inode_dirty(handle, inode);
-					if (error == 0)
-						acl = NULL;
-				}
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+		if (acl) {
+			mode_t mode = inode->i_mode;
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0)
+				return error;
+			else {
+				inode->i_mode = mode;
+				ext4_mark_inode_dirty(handle, inode);
+				if (error == 0)
+					acl = NULL;
 			}
-			break;
+		}
+		break;
 
-		case ACL_TYPE_DEFAULT:
-			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			if (!S_ISDIR(inode->i_mode))
-				return acl ? -EACCES : 0;
-			break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		break;
 
-		default:
-			return -EINVAL;
+	default:
+		return -EINVAL;
 	}
 	if (acl) {
 		value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 
 	kfree(value);
 	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext4_iset_acl(inode, &ei->i_acl, acl);
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				ext4_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
+		switch (type) {
+		case ACL_TYPE_ACCESS:
+			ext4_iset_acl(inode, &ei->i_acl, acl);
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			ext4_iset_acl(inode, &ei->i_default_acl, acl);
+			break;
 		}
 	}
 	return error;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f7529e27d79..612c3d2c382 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
 
 	/*
 	 * get the next allocated block if the extent in the path
-	 * is before the requested block(s) 
+	 * is before the requested block(s)
 	 */
 	if (b2 < b1) {
 		b2 = ext4_ext_next_allocated_block(path);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 85a862c9c4c..0080999d2cd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1054,10 +1054,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 
 
 /*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
  *
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
  * Otherwise it takes the write lock of the i_data_sem and allocate blocks
  * and store the allocated blocks in the result buffer head and mark it
  * mapped.
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd9..0a926516426 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
 			     "Inode bitmap not in group (block %llu)",
 			     (unsigned long long)input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
-	         outside(itend - 1, start, end))
+		 outside(itend - 1, start, end))
 		ext4_warning(sb, __func__,
 			     "Inode table not in group (blocks %llu-%llu)",
 			     (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
 			     (unsigned long long)input->inode_bitmap,
 			     start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
-	         inside(itend - 1, start, metaend))
+		 inside(itend - 1, start, metaend))
 		ext4_warning(sb, __func__,
 			     "Inode table (%llu-%llu) overlaps"
 			     "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
 	if (err) {
 		if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
 			return err;
-	        if ((err = ext4_journal_get_write_access(handle, bh)))
+		if ((err = ext4_journal_get_write_access(handle, bh)))
 			return err;
-        }
+	}
 
 	return 0;
 }
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
 		       gdb_num);
 
-	/*
-	 * If we are not using the primary superblock/GDT copy don't resize,
-	 * because the user tools have no way of handling this.  Probably a
-	 * bad time to do it anyways.
-	 */
+        /*
+         * If we are not using the primary superblock/GDT copy don't resize,
+         * because the user tools have no way of handling this.  Probably a
+         * bad time to do it anyways.
+         */
 	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
 	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
 		ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	return 0;
 
 exit_inode:
-	//ext4_journal_release_buffer(handle, iloc.bh);
+	/* ext4_journal_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
 exit_dindj:
-	//ext4_journal_release_buffer(handle, dind);
+	/* ext4_journal_release_buffer(handle, dind); */
 exit_primary:
-	//ext4_journal_release_buffer(handle, *primary);
+	/* ext4_journal_release_buffer(handle, *primary); */
 exit_sbh:
-	//ext4_journal_release_buffer(handle, *primary);
+	/* ext4_journal_release_buffer(handle, *primary); */
 exit_dind:
 	brelse(dind);
 exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
 		goto exit_journal;
 
-	/*
-	 * We will only either add reserved group blocks to a backup group
-	 * or remove reserved blocks for the first group in a new group block.
-	 * Doing both would be mean more complex code, and sane people don't
-	 * use non-sparse filesystems anymore.  This is already checked above.
-	 */
+        /*
+         * We will only either add reserved group blocks to a backup group
+         * or remove reserved blocks for the first group in a new group block.
+         * Doing both would be mean more complex code, and sane people don't
+         * use non-sparse filesystems anymore.  This is already checked above.
+         */
 	if (gdb_off) {
 		primary = sbi->s_group_desc[gdb_num];
 		if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
 		goto exit_journal;
 
-	/*
-	 * OK, now we've set up the new group.  Time to make it active.
-	 *
-	 * Current kernels don't lock all allocations via lock_super(),
-	 * so we have to be safe wrt. concurrent accesses the group
-	 * data.  So we need to be careful to set all of the relevant
-	 * group descriptor data etc. *before* we enable the group.
-	 *
-	 * The key field here is sbi->s_groups_count: as long as
-	 * that retains its old value, nobody is going to access the new
-	 * group.
-	 *
-	 * So first we update all the descriptor metadata for the new
-	 * group; then we update the total disk blocks count; then we
-	 * update the groups count to enable the group; then finally we
-	 * update the free space counts so that the system can start
-	 * using the new disk blocks.
-	 */
+        /*
+         * OK, now we've set up the new group.  Time to make it active.
+         *
+         * Current kernels don't lock all allocations via lock_super(),
+         * so we have to be safe wrt. concurrent accesses the group
+         * data.  So we need to be careful to set all of the relevant
+         * group descriptor data etc. *before* we enable the group.
+         *
+         * The key field here is sbi->s_groups_count: as long as
+         * that retains its old value, nobody is going to access the new
+         * group.
+         *
+         * So first we update all the descriptor metadata for the new
+         * group; then we update the total disk blocks count; then we
+         * update the groups count to enable the group; then finally we
+         * update the free space counts so that the system can start
+         * using the new disk blocks.
+         */
 
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
 	return err;
 } /* ext4_group_add */
 
-/* Extend the filesystem to the new number of blocks specified.  This entry
+/*
+ * Extend the filesystem to the new number of blocks specified.  This entry
  * point is only used to extend the current filesystem to the end of the last
  * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
  * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 			     o_blocks_count + add, add);
 
 	/* See if the device is actually as big as what was requested */
-	bh = sb_bread(sb, o_blocks_count + add -1);
+	bh = sb_bread(sb, o_blocks_count + add - 1);
 	if (!bh) {
 		ext4_warning(sb, __func__,
 			     "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e34fc2d6dbf..09e3c56782a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
 static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
 			       unsigned int);
-static void ext4_commit_super (struct super_block * sb,
-			       struct ext4_super_block * es,
-			       int sync);
-static void ext4_mark_recovery_complete(struct super_block * sb,
-					struct ext4_super_block * es);
-static void ext4_clear_journal_err(struct super_block * sb,
-				   struct ext4_super_block * es);
+static void ext4_commit_super(struct super_block *sb,
+			      struct ext4_super_block *es, int sync);
+static void ext4_mark_recovery_complete(struct super_block *sb,
+					struct ext4_super_block *es);
+static void ext4_clear_journal_err(struct super_block *sb,
+				   struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
 				     char nbuf[16]);
-static int ext4_remount (struct super_block * sb, int * flags, char * data);
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext4_remount(struct super_block *sb, int *flags, char *data);
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static void ext4_unlockfs(struct super_block *sb);
-static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super(struct super_block *sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
 
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	if (!test_opt (sb, ERRORS_CONT)) {
+	if (!test_opt(sb, ERRORS_CONT)) {
 		journal_t *journal = EXT4_SB(sb)->s_journal;
 
 		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 		if (journal)
 			jbd2_journal_abort(journal, -EIO);
 	}
-	if (test_opt (sb, ERRORS_RO)) {
-		printk (KERN_CRIT "Remounting filesystem read-only\n");
+	if (test_opt(sb, ERRORS_RO)) {
+		printk(KERN_CRIT "Remounting filesystem read-only\n");
 		sb->s_flags |= MS_RDONLY;
 	}
 	ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
 			sb->s_id);
 }
 
-void ext4_error (struct super_block * sb, const char * function,
-		 const char * fmt, ...)
+void ext4_error(struct super_block *sb, const char *function,
+		const char *fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
 	ext4_handle_error(sb);
 }
 
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
 				     char nbuf[16])
 {
 	char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
 /* __ext4_std_error decodes expected errors from journaling functions
  * automatically and invokes the appropriate error response.  */
 
-void __ext4_std_error (struct super_block * sb, const char * function,
-		       int errno)
+void __ext4_std_error(struct super_block *sb, const char *function, int errno)
 {
 	char nbuf[16];
 	const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
 		return;
 
 	errstr = ext4_decode_error(sb, errno, nbuf);
-	printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
-		sb->s_id, function, errstr);
+	printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
+	       sb->s_id, function, errstr);
 
 	ext4_handle_error(sb);
 }
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
  * case we take the easy way out and panic immediately.
  */
 
-void ext4_abort (struct super_block * sb, const char * function,
-		 const char * fmt, ...)
+void ext4_abort(struct super_block *sb, const char *function,
+		const char *fmt, ...)
 {
 	va_list args;
 
-	printk (KERN_CRIT "ext4_abort called.\n");
+	printk(KERN_CRIT "ext4_abort called.\n");
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
 	jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
-void ext4_warning (struct super_block * sb, const char * function,
-		   const char * fmt, ...)
+void ext4_warning(struct super_block *sb, const char *function,
+		  const char *fmt, ...)
 {
 	va_list args;
 
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 	}
 }
 
-static void ext4_put_super (struct super_block * sb)
+static void ext4_put_super(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
 				       &EXT4_I(inode)->jinode);
 }
 
-static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq,
+					   struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
-#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
+#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 
 static int ext4_dquot_initialize(struct inode *inode, int type);
 static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
 	return sb_block;
 }
 
-static int parse_options (char *options, struct super_block *sb,
-			  unsigned int *inum, unsigned long *journal_devnum,
-			  ext4_fsblk_t *n_blocks_count, int is_remount)
+static int parse_options(char *options, struct super_block *sb,
+			 unsigned int *inum, unsigned long *journal_devnum,
+			 ext4_fsblk_t *n_blocks_count, int is_remount)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	char * p;
+	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int data_opt = 0;
 	int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
 	if (!options)
 		return 1;
 
-	while ((p = strsep (&options, ",")) != NULL) {
+	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
 		if (!*p)
 			continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_bsd_df:
-			clear_opt (sbi->s_mount_opt, MINIX_DF);
+			clear_opt(sbi->s_mount_opt, MINIX_DF);
 			break;
 		case Opt_minix_df:
-			set_opt (sbi->s_mount_opt, MINIX_DF);
+			set_opt(sbi->s_mount_opt, MINIX_DF);
 			break;
 		case Opt_grpid:
-			set_opt (sbi->s_mount_opt, GRPID);
+			set_opt(sbi->s_mount_opt, GRPID);
 			break;
 		case Opt_nogrpid:
-			clear_opt (sbi->s_mount_opt, GRPID);
+			clear_opt(sbi->s_mount_opt, GRPID);
 			break;
 		case Opt_resuid:
 			if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
 			/* *sb_block = match_int(&args[0]); */
 			break;
 		case Opt_err_panic:
-			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
-			clear_opt (sbi->s_mount_opt, ERRORS_RO);
-			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt(sbi->s_mount_opt, ERRORS_RO);
+			set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 			break;
 		case Opt_err_ro:
-			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
-			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
-			set_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt(sbi->s_mount_opt, ERRORS_RO);
 			break;
 		case Opt_err_cont:
-			clear_opt (sbi->s_mount_opt, ERRORS_RO);
-			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
-			set_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt(sbi->s_mount_opt, ERRORS_RO);
+			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt(sbi->s_mount_opt, ERRORS_CONT);
 			break;
 		case Opt_nouid32:
-			set_opt (sbi->s_mount_opt, NO_UID32);
+			set_opt(sbi->s_mount_opt, NO_UID32);
 			break;
 		case Opt_nocheck:
-			clear_opt (sbi->s_mount_opt, CHECK);
+			clear_opt(sbi->s_mount_opt, CHECK);
 			break;
 		case Opt_debug:
-			set_opt (sbi->s_mount_opt, DEBUG);
+			set_opt(sbi->s_mount_opt, DEBUG);
 			break;
 		case Opt_oldalloc:
-			set_opt (sbi->s_mount_opt, OLDALLOC);
+			set_opt(sbi->s_mount_opt, OLDALLOC);
 			break;
 		case Opt_orlov:
-			clear_opt (sbi->s_mount_opt, OLDALLOC);
+			clear_opt(sbi->s_mount_opt, OLDALLOC);
 			break;
 #ifdef CONFIG_EXT4DEV_FS_XATTR
 		case Opt_user_xattr:
-			set_opt (sbi->s_mount_opt, XATTR_USER);
+			set_opt(sbi->s_mount_opt, XATTR_USER);
 			break;
 		case Opt_nouser_xattr:
-			clear_opt (sbi->s_mount_opt, XATTR_USER);
+			clear_opt(sbi->s_mount_opt, XATTR_USER);
 			break;
 #else
 		case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
 				       "journal on remount\n");
 				return 0;
 			}
-			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
 			break;
 		case Opt_journal_inum:
 			if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
 			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
 			break;
 		case Opt_noload:
-			set_opt (sbi->s_mount_opt, NOLOAD);
+			set_opt(sbi->s_mount_opt, NOLOAD);
 			break;
 		case Opt_commit:
 			if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
 					"on this filesystem, use tune2fs\n");
 				return 0;
 			}
-			set_opt (sbi->s_mount_opt, EXTENTS);
+			set_opt(sbi->s_mount_opt, EXTENTS);
 			break;
 		case Opt_noextents:
 			/*
@@ -1348,7 +1347,7 @@ set_qf_format:
 						"-o noextents options\n");
 				return 0;
 			}
-			clear_opt (sbi->s_mount_opt, EXTENTS);
+			clear_opt(sbi->s_mount_opt, EXTENTS);
 			break;
 		case Opt_i_version:
 			set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
 		default:
-			printk (KERN_ERR
-				"EXT4-fs: Unrecognized mount option \"%s\" "
-				"or missing value\n", p);
+			printk(KERN_ERR
+			       "EXT4-fs: Unrecognized mount option \"%s\" "
+			       "or missing value\n", p);
 			return 0;
 		}
 	}
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	int res = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-		printk (KERN_ERR "EXT4-fs warning: revision level too high, "
-			"forcing read-only mode\n");
+		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
+		       "forcing read-only mode\n");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
-		printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
-			"running e2fsck is recommended\n");
+		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
+		       "running e2fsck is recommended\n");
 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-		printk (KERN_WARNING
-			"EXT4-fs warning: mounting fs with errors, "
-			"running e2fsck is recommended\n");
+		printk(KERN_WARNING
+		       "EXT4-fs warning: mounting fs with errors, "
+		       "running e2fsck is recommended\n");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-		printk (KERN_WARNING
-			"EXT4-fs warning: maximal mount count reached, "
-			"running e2fsck is recommended\n");
+		printk(KERN_WARNING
+		       "EXT4-fs warning: maximal mount count reached, "
+		       "running e2fsck is recommended\n");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-		printk (KERN_WARNING
-			"EXT4-fs warning: checktime reached, "
-			"running e2fsck is recommended\n");
+		printk(KERN_WARNING
+		       "EXT4-fs warning: checktime reached, "
+		       "running e2fsck is recommended\n");
 #if 0
 		/* @@@ We _will_ want to clear the valid bit if we find
 		 * inconsistencies, to force a fsck at reboot.  But for
@@ -1596,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 		block_bitmap = ext4_block_bitmap(sb, gdp);
-		if (block_bitmap < first_block || block_bitmap > last_block)
-		{
+		if (block_bitmap < first_block || block_bitmap > last_block) {
 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
 			       "Block bitmap for group %lu not in group "
 			       "(block %llu)!", i, block_bitmap);
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
-		if (inode_bitmap < first_block || inode_bitmap > last_block)
-		{
+		if (inode_bitmap < first_block || inode_bitmap > last_block) {
 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
 			       "Inode bitmap for group %lu not in group "
 			       "(block %llu)!", i, inode_bitmap);
@@ -1613,8 +1610,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 		}
 		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
-		    inode_table + sbi->s_itb_per_group - 1 > last_block)
-		{
+		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
 			       "Inode table for group %lu not in group "
 			       "(block %llu)!", i, inode_table);
@@ -1635,7 +1631,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
+	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
@@ -1656,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
  * e2fsck was run on this filesystem, and it must have already done the orphan
  * inode cleanup for us, so we can safely abort without any further action.
  */
-static void ext4_orphan_cleanup (struct super_block * sb,
-				 struct ext4_super_block * es)
+static void ext4_orphan_cleanup(struct super_block *sb,
+				struct ext4_super_block *es)
 {
 	unsigned int s_flags = sb->s_flags;
 	int nr_orphans = 0, nr_truncates = 0;
@@ -1734,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 		iput(inode);  /* The delete magic happens here! */
 	}
 
-#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
 
 	if (nr_orphans)
 		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1901,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 	return 0;
 }
 
-static int ext4_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				__releases(kernel_lock)
 				__acquires(kernel_lock)
 
 {
-	struct buffer_head * bh;
+	struct buffer_head *bh;
 	struct ext4_super_block *es = NULL;
 	struct ext4_sb_info *sbi;
 	ext4_fsblk_t block;
@@ -1955,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (!(bh = sb_bread(sb, logical_sb_block))) {
-		printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
+		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
 		goto out_fail;
 	}
 	/*
@@ -2028,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	set_opt(sbi->s_mount_opt, DELALLOC);
 
 
-	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
-			    NULL, 0))
+	if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
+			   NULL, 0))
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2104,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 			goto failed_mount;
 		}
 
-		brelse (bh);
+		brelse(bh);
 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
 		offset = do_div(logical_sb_block, blocksize);
 		bh = sb_bread(sb, logical_sb_block);
@@ -2116,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-			printk (KERN_ERR
-				"EXT4-fs: Magic mismatch, very weird !\n");
+			printk(KERN_ERR
+			       "EXT4-fs: Magic mismatch, very weird !\n");
 			goto failed_mount;
 		}
 	}
@@ -2134,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
-			printk (KERN_ERR
-				"EXT4-fs: unsupported inode size: %d\n",
-				sbi->s_inode_size);
+			printk(KERN_ERR
+			       "EXT4-fs: unsupported inode size: %d\n",
+			       sbi->s_inode_size);
 			goto failed_mount;
 		}
 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2168,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
-	for (i=0; i < 4; i++)
+	for (i = 0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT4-fs: #blocks per group too big: %lu\n",
-			sbi->s_blocks_per_group);
+		printk(KERN_ERR
+		       "EXT4-fs: #blocks per group too big: %lu\n",
+		       sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT4-fs: #inodes per group too big: %lu\n",
-			sbi->s_inodes_per_group);
+		printk(KERN_ERR
+		       "EXT4-fs: #inodes per group too big: %lu\n",
+		       sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
@@ -2215,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_groups_count = blocks_count;
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
-	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk (KERN_ERR "EXT4-fs: not enough memory\n");
+		printk(KERN_ERR "EXT4-fs: not enough memory\n");
 		goto failed_mount;
 	}
 
@@ -2228,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		block = descriptor_loc(sb, logical_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk (KERN_ERR "EXT4-fs: "
-				"can't read group descriptor %d\n", i);
+			printk(KERN_ERR "EXT4-fs: "
+			       "can't read group descriptor %d\n", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
-	if (!ext4_check_descriptors (sb)) {
+	if (!ext4_check_descriptors(sb)) {
 		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
 		goto failed_mount2;
 	}
@@ -2310,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
 			printk(KERN_CRIT "EXT4-fs error (device %s): "
 			       "ext4_fill_super: Journal transaction "
-			       "%u is corrupt\n", sb->s_id, 
+			       "%u is corrupt\n", sb->s_id,
 			       EXT4_SB(sb)->s_journal->j_failed_commit);
-			if (test_opt (sb, ERRORS_RO)) {
-				printk (KERN_CRIT
-					"Mounting filesystem read-only\n");
+			if (test_opt(sb, ERRORS_RO)) {
+				printk(KERN_CRIT
+				       "Mounting filesystem read-only\n");
 				sb->s_flags |= MS_RDONLY;
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2334,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 			goto failed_mount3;
 	} else {
 		if (!silent)
-			printk (KERN_ERR
-				"ext4: No journal on filesystem on %s\n",
-				sb->s_id);
+			printk(KERN_ERR
+			       "ext4: No journal on filesystem on %s\n",
+			       sb->s_id);
 		goto failed_mount3;
 	}
 
@@ -2420,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount4;
 	}
 
-	ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
 
 	/* determine the minimum size of new large inodes, if present */
 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2459,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery)
-		printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
 	ext4_mark_recovery_complete(sb, es);
-	printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
-		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
-		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
-		"writeback");
+	printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+	       test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+	       test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+	       "writeback");
 
 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
 		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2577,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 static journal_t *ext4_get_dev_journal(struct super_block *sb,
 				       dev_t j_dev)
 {
-	struct buffer_head * bh;
+	struct buffer_head *bh;
 	journal_t *journal;
 	ext4_fsblk_t start;
 	ext4_fsblk_t len;
 	int hblock, blocksize;
 	ext4_fsblk_t sb_block;
 	unsigned long offset;
-	struct ext4_super_block * es;
+	struct ext4_super_block *es;
 	struct block_device *bdev;
 
 	bdev = ext4_blkdev_get(j_dev);
@@ -2699,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
 					"unavailable, cannot proceed.\n");
 				return -EROFS;
 			}
-			printk (KERN_INFO "EXT4-fs: write access will "
-					"be enabled during recovery.\n");
+			printk(KERN_INFO "EXT4-fs: write access will "
+			       "be enabled during recovery.\n");
 		}
 	}
 
@@ -2753,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
 	return 0;
 }
 
-static int ext4_create_journal(struct super_block * sb,
-			       struct ext4_super_block * es,
+static int ext4_create_journal(struct super_block *sb,
+			       struct ext4_super_block *es,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
@@ -2795,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
 	return 0;
 }
 
-static void ext4_commit_super (struct super_block * sb,
-			       struct ext4_super_block * es,
-			       int sync)
+static void ext4_commit_super(struct super_block *sb,
+			      struct ext4_super_block *es, int sync)
 {
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 
@@ -2818,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
  * remounting) the filesystem readonly, then we will end up with a
  * consistent fs on disk.  Record that fact.
  */
-static void ext4_mark_recovery_complete(struct super_block * sb,
-					struct ext4_super_block * es)
+static void ext4_mark_recovery_complete(struct super_block *sb,
+					struct ext4_super_block *es)
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
@@ -2841,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext4_clear_journal_err(struct super_block * sb,
-				   struct ext4_super_block * es)
+static void ext4_clear_journal_err(struct super_block *sb,
+				   struct ext4_super_block *es)
 {
 	journal_t *journal;
 	int j_errno;
@@ -2867,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super (sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 
 		jbd2_journal_clear_err(journal);
 	}
@@ -2900,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
  * This implicitly triggers the writebehind on sync().
  */
 
-static void ext4_write_super (struct super_block * sb)
+static void ext4_write_super(struct super_block *sb)
 {
 	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
@@ -2956,9 +2951,9 @@ static void ext4_unlockfs(struct super_block *sb)
 	}
 }
 
-static int ext4_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount(struct super_block *sb, int *flags, char *data)
 {
-	struct ext4_super_block * es;
+	struct ext4_super_block *es;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t n_blocks_count = 0;
 	unsigned long old_sb_flags;
@@ -3086,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
 			if ((err = ext4_group_extend(sb, es, n_blocks_count)))
 				goto restore_opts;
-			if (!ext4_setup_super (sb, es, 0))
+			if (!ext4_setup_super(sb, es, 0))
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
@@ -3116,7 +3111,7 @@ restore_opts:
 	return err;
 }
 
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3354,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 	}
 	/* Journaling quota? */
 	if (EXT4_SB(sb)->s_qf_names[type]) {
-		/* Quotafile not of fs root? */
+		/* Quotafile not in fs root? */
 		if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
 			printk(KERN_WARNING
 				"EXT4-fs: Quota file not on filesystem root. "
 				"Journaled quota will not work.\n");
- 	}
+	}
 
 	/*
 	 * When we journal data on quota file, we have to flush journal to see
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2..8954208b489 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
 	char *name = entry->e_name;
 	int n;
 
-	for (n=0; n < entry->e_name_len; n++) {
+	for (n = 0; n < entry->e_name_len; n++) {
 		hash = (hash << NAME_HASH_SHIFT) ^
 		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
 		       *name++;
-- 
cgit v1.2.3-70-g09d2


From 00b32b7fb671e797bdd2736524a497f18a8df7bf Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 26 Jul 2008 17:33:53 -0400
Subject: ext4: unexport jbd2_journal_update_superblock

Remove the unused EXPORT_SYMBOL(jbd2_journal_update_superblock).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6a..8207a01c4ed 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
 EXPORT_SYMBOL(jbd2_journal_create);
 EXPORT_SYMBOL(jbd2_journal_load);
 EXPORT_SYMBOL(jbd2_journal_destroy);
-EXPORT_SYMBOL(jbd2_journal_update_superblock);
 EXPORT_SYMBOL(jbd2_journal_abort);
 EXPORT_SYMBOL(jbd2_journal_errno);
 EXPORT_SYMBOL(jbd2_journal_ack_err);
-- 
cgit v1.2.3-70-g09d2


From f9f6dce01905179d9a209cc1e69fe9047736c112 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Thu, 17 Apr 2008 16:49:43 +1000
Subject: [XFS] Split xfs_dir2_leafn_lookup_int into its two pieces of
 functionality

SGI-PV: 976035
SGI-Modid: xfs-linux-melb:xfs-kern:30834a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_dir2_node.c | 358 ++++++++++++++++++++++++++++---------------------
 1 file changed, 202 insertions(+), 156 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f09..e29b7c63e19 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
 }
 
 /*
- * Look up a leaf entry in a node-format leaf block.
- * If this is an addname then the extrablk in state is a freespace block,
- * otherwise it's a data block.
+ * Look up a leaf entry for space to add a name in a node-format leaf block.
+ * The extrablk in state is a freespace block.
  */
-int
-xfs_dir2_leafn_lookup_int(
+STATIC int
+xfs_dir2_leafn_lookup_for_addname(
 	xfs_dabuf_t		*bp,		/* leaf buffer */
 	xfs_da_args_t		*args,		/* operation arguments */
 	int			*indexp,	/* out: leaf entry index */
 	xfs_da_state_t		*state)		/* state to fill in */
 {
-	xfs_dabuf_t		*curbp;		/* current data/free buffer */
-	xfs_dir2_db_t		curdb;		/* current data block number */
-	xfs_dir2_db_t		curfdb;		/* current free block number */
-	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_dabuf_t		*curbp = NULL;	/* current data/free buffer */
+	xfs_dir2_db_t		curdb = -1;	/* current data block number */
+	xfs_dir2_db_t		curfdb = -1;	/* current free block number */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
 	int			fi;		/* free entry index */
-	xfs_dir2_free_t		*free=NULL;	/* free block structure */
+	xfs_dir2_free_t		*free = NULL;	/* free block structure */
 	int			index;		/* leaf entry index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
-	int			length=0;	/* length of new data entry */
+	int			length;		/* length of new data entry */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_db_t		newdb;		/* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
 	/*
 	 * Do we have a buffer coming in?
 	 */
-	if (state->extravalid)
+	if (state->extravalid) {
+		/* If so, it's a free block buffer, get the block number. */
 		curbp = state->extrablk.bp;
-	else
-		curbp = NULL;
-	/*
-	 * For addname, it's a free block buffer, get the block number.
-	 */
-	if (args->addname) {
-		curfdb = curbp ? state->extrablk.blkno : -1;
-		curdb = -1;
-		length = xfs_dir2_data_entsize(args->namelen);
-		if ((free = (curbp ? curbp->data : NULL)))
-			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
-	}
-	/*
-	 * For others, it's a data block buffer, get the block number.
-	 */
-	else {
-		curfdb = -1;
-		curdb = curbp ? state->extrablk.blkno : -1;
+		curfdb = state->extrablk.blkno;
+		free = curbp->data;
+		ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
+	length = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * Loop over leaf entries with the right hash value.
 	 */
-	for (lep = &leaf->ents[index];
-	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
-	     lep++, index++) {
+	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+				be32_to_cpu(lep->hashval) == args->hashval;
+				lep++, index++) {
 		/*
 		 * Skip stale leaf entries.
 		 */
@@ -471,158 +456,218 @@ xfs_dir2_leafn_lookup_int(
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
 		 * hash value to ours if there is one with room.
+		 *
+		 * If this block isn't the data block we already have
+		 * in hand, take a look at it.
 		 */
-		if (args->addname) {
+		if (newdb != curdb) {
+			curdb = newdb;
 			/*
-			 * If this block isn't the data block we already have
-			 * in hand, take a look at it.
+			 * Convert the data block to the free block
+			 * holding its freespace information.
 			 */
-			if (newdb != curdb) {
-				curdb = newdb;
-				/*
-				 * Convert the data block to the free block
-				 * holding its freespace information.
-				 */
-				newfdb = xfs_dir2_db_to_fdb(mp, newdb);
-				/*
-				 * If it's not the one we have in hand,
-				 * read it in.
-				 */
-				if (newfdb != curfdb) {
-					/*
-					 * If we had one before, drop it.
-					 */
-					if (curbp)
-						xfs_da_brelse(tp, curbp);
-					/*
-					 * Read the free block.
-					 */
-					if ((error = xfs_da_read_buf(tp, dp,
-							xfs_dir2_db_to_da(mp,
-								newfdb),
-							-1, &curbp,
-							XFS_DATA_FORK))) {
-						return error;
-					}
-					free = curbp->data;
-					ASSERT(be32_to_cpu(free->hdr.magic) ==
-					       XFS_DIR2_FREE_MAGIC);
-					ASSERT((be32_to_cpu(free->hdr.firstdb) %
-						XFS_DIR2_MAX_FREE_BESTS(mp)) ==
-					       0);
-					ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
-					ASSERT(curdb <
-					       be32_to_cpu(free->hdr.firstdb) +
-					       be32_to_cpu(free->hdr.nvalid));
-				}
-				/*
-				 * Get the index for our entry.
-				 */
-				fi = xfs_dir2_db_to_fdindex(mp, curdb);
-				/*
-				 * If it has room, return it.
-				 */
-				if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
-					XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
-							 XFS_ERRLEVEL_LOW, mp);
-					if (curfdb != newfdb)
-						xfs_da_brelse(tp, curbp);
-					return XFS_ERROR(EFSCORRUPTED);
-				}
-				curfdb = newfdb;
-				if (be16_to_cpu(free->bests[fi]) >= length) {
-					*indexp = index;
-					state->extravalid = 1;
-					state->extrablk.bp = curbp;
-					state->extrablk.blkno = curfdb;
-					state->extrablk.index = fi;
-					state->extrablk.magic =
-						XFS_DIR2_FREE_MAGIC;
-					ASSERT(args->oknoent);
-					return XFS_ERROR(ENOENT);
-				}
-			}
-		}
-		/*
-		 * Not adding a new entry, so we really want to find
-		 * the name given to us.
-		 */
-		else {
+			newfdb = xfs_dir2_db_to_fdb(mp, newdb);
 			/*
-			 * If it's a different data block, go get it.
+			 * If it's not the one we have in hand, read it in.
 			 */
-			if (newdb != curdb) {
+			if (newfdb != curfdb) {
 				/*
-				 * If we had a block before, drop it.
+				 * If we had one before, drop it.
 				 */
 				if (curbp)
 					xfs_da_brelse(tp, curbp);
 				/*
-				 * Read the data block.
+				 * Read the free block.
 				 */
-				if ((error =
-				    xfs_da_read_buf(tp, dp,
-					    xfs_dir2_db_to_da(mp, newdb), -1,
-					    &curbp, XFS_DATA_FORK))) {
+				error = xfs_da_read_buf(tp, dp,
+						xfs_dir2_db_to_da(mp, newfdb),
+						-1, &curbp, XFS_DATA_FORK);
+				if (error)
 					return error;
-				}
-				xfs_dir2_data_check(dp, curbp);
-				curdb = newdb;
+				free = curbp->data;
+				ASSERT(be32_to_cpu(free->hdr.magic) ==
+					XFS_DIR2_FREE_MAGIC);
+				ASSERT((be32_to_cpu(free->hdr.firstdb) %
+					XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+				ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
+				ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
+					be32_to_cpu(free->hdr.nvalid));
 			}
 			/*
-			 * Point to the data entry.
+			 * Get the index for our entry.
 			 */
-			dep = (xfs_dir2_data_entry_t *)
-			      ((char *)curbp->data +
-			       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+			fi = xfs_dir2_db_to_fdindex(mp, curdb);
 			/*
-			 * Compare the entry, return it if it matches.
+			 * If it has room, return it.
 			 */
-			if (dep->namelen == args->namelen &&
-			    dep->name[0] == args->name[0] &&
-			    memcmp(dep->name, args->name, args->namelen) == 0) {
-				args->inumber = be64_to_cpu(dep->inumber);
-				*indexp = index;
-				state->extravalid = 1;
-				state->extrablk.bp = curbp;
-				state->extrablk.blkno = curdb;
-				state->extrablk.index =
-					(int)((char *)dep -
-					      (char *)curbp->data);
-				state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
-				return XFS_ERROR(EEXIST);
+			if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+				XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
+							XFS_ERRLEVEL_LOW, mp);
+				if (curfdb != newfdb)
+					xfs_da_brelse(tp, curbp);
+				return XFS_ERROR(EFSCORRUPTED);
 			}
+			curfdb = newfdb;
+			if (be16_to_cpu(free->bests[fi]) >= length)
+				goto out;
 		}
 	}
+	/* Didn't find any space */
+	fi = -1;
+out:
+	ASSERT(args->oknoent);
+	if (curbp) {
+		/* Giving back a free block. */
+		state->extravalid = 1;
+		state->extrablk.bp = curbp;
+		state->extrablk.index = fi;
+		state->extrablk.blkno = curfdb;
+		state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+	} else {
+		state->extravalid = 0;
+	}
 	/*
-	 * Didn't find a match.
-	 * If we are holding a buffer, give it back in case our caller
-	 * finds it useful.
+	 * Return the index, that will be the insertion point.
 	 */
-	if ((state->extravalid = (curbp != NULL))) {
-		state->extrablk.bp = curbp;
-		state->extrablk.index = -1;
+	*indexp = index;
+	return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * The extrablk in state a data block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_entry(
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			*indexp,	/* out: leaf entry index */
+	xfs_da_state_t		*state)		/* state to fill in */
+{
+	xfs_dabuf_t		*curbp = NULL;	/* current data/free buffer */
+	xfs_dir2_db_t		curdb = -1;	/* current data block number */
+	xfs_dir2_data_entry_t	*dep;		/* data block entry */
+	xfs_inode_t		*dp;		/* incore directory inode */
+	int			error;		/* error return value */
+	int			di;		/* data entry index */
+	int			index;		/* leaf entry index */
+	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
+	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_dir2_db_t		newdb;		/* new data block number */
+	xfs_trans_t		*tp;		/* transaction pointer */
+
+	dp = args->dp;
+	tp = args->trans;
+	mp = dp->i_mount;
+	leaf = bp->data;
+	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+	ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
+#endif
+	xfs_dir2_leafn_check(dp, bp);
+	/*
+	 * Look up the hash value in the leaf entries.
+	 */
+	index = xfs_dir2_leaf_search_hash(args, bp);
+	/*
+	 * Do we have a buffer coming in?
+	 */
+	if (state->extravalid) {
+		curbp = state->extrablk.bp;
+		curdb = state->extrablk.blkno;
+	}
+	/*
+	 * Loop over leaf entries with the right hash value.
+	 */
+	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+				be32_to_cpu(lep->hashval) == args->hashval;
+				lep++, index++) {
+		/*
+		 * Skip stale leaf entries.
+		 */
+		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+			continue;
+		/*
+		 * Pull the data block number from the entry.
+		 */
+		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		/*
-		 * For addname, giving back a free block.
+		 * Not adding a new entry, so we really want to find
+		 * the name given to us.
+		 *
+		 * If it's a different data block, go get it.
 		 */
-		if (args->addname) {
-			state->extrablk.blkno = curfdb;
-			state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+		if (newdb != curdb) {
+			/*
+			 * If we had a block before, drop it.
+			 */
+			if (curbp)
+				xfs_da_brelse(tp, curbp);
+			/*
+			 * Read the data block.
+			 */
+			error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp,
+					newdb), -1, &curbp, XFS_DATA_FORK);
+			if (error)
+				return error;
+			xfs_dir2_data_check(dp, curbp);
+			curdb = newdb;
 		}
 		/*
-		 * For other callers, giving back a data block.
+		 * Point to the data entry.
 		 */
-		else {
-			state->extrablk.blkno = curdb;
-			state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+		dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+		/*
+		 * Compare the entry, return it if it matches.
+		 */
+		if (dep->namelen == args->namelen && memcmp(dep->name,
+					args->name, args->namelen) == 0) {
+			args->inumber = be64_to_cpu(dep->inumber);
+			di = (int)((char *)dep - (char *)curbp->data);
+			error = EEXIST;
+			goto out;
 		}
 	}
+	/* Didn't find a match. */
+	error = ENOENT;
+	di = -1;
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
+out:
+	if (curbp) {
+		/* Giving back a data block. */
+		state->extravalid = 1;
+		state->extrablk.bp = curbp;
+		state->extrablk.index = di;
+		state->extrablk.blkno = curdb;
+		state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+	} else {
+		state->extravalid = 0;
+	}
 	/*
-	 * Return the final index, that will be the insertion point.
+	 * Return the index, that will be the insertion point.
 	 */
 	*indexp = index;
-	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
-	return XFS_ERROR(ENOENT);
+	return XFS_ERROR(error);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+	xfs_dabuf_t		*bp,		/* leaf buffer */
+	xfs_da_args_t		*args,		/* operation arguments */
+	int			*indexp,	/* out: leaf entry index */
+	xfs_da_state_t		*state)		/* state to fill in */
+{
+	if (args->addname)
+		return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
+							state);
+	return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
 }
 
 /*
@@ -823,9 +868,10 @@ xfs_dir2_leafn_rebalance(
 	 */
 	if (!state->inleaf)
 		blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
-	
-	/* 
-	 * Finally sanity check just to make sure we are not returning a negative index 
+
+	/*
+	 * Finally sanity check just to make sure we are not returning a
+	 * negative index
 	 */
 	if(blk2->index < 0) {
 		state->inleaf = 1;
-- 
cgit v1.2.3-70-g09d2


From 7c12f296500e1157872ef45b3f3bb06b4b73f1c1 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Wed, 30 Apr 2008 18:15:28 +1000
Subject: [XFS] Fix up noattr2 so that it will properly update the versionnum
 and features2 fields.

Previously, mounting with noattr2 failed to achieve anything because
although it cleared the attr2 mount flag, it would set it again as soon as
it processed the superblock fields. The fix now has an explicit noattr2
flag and uses it later to fix up the versionnum and features2 fields.

SGI-PV: 980021
SGI-Modid: xfs-linux-melb:xfs-kern:31003a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c |  1 +
 fs/xfs/xfs_clnt.h            |  1 +
 fs/xfs/xfs_mount.c           | 12 +++++++++++-
 fs/xfs/xfs_mount.h           |  1 +
 fs/xfs/xfs_sb.h              |  7 +++++++
 fs/xfs/xfs_vfsops.c          |  9 ++++++++-
 6 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 943381284e2..1b60e46f527 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -314,6 +314,7 @@ xfs_parseargs(
 			args->flags |= XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
 			args->flags &= ~XFSMNT_ATTR2;
+			args->flags |= XFSMNT_NOATTR2;
 		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
 			args->flags2 |= XFSMNT2_FILESTREAMS;
 		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee22..d2ce5dd70d8 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
 #define XFSMNT_IOSIZE		0x00002000	/* optimize for I/O size */
 #define XFSMNT_OSYNCISOSYNC	0x00004000	/* o_sync is REALLY o_sync */
 						/* (osyncisdsync is default) */
+#define XFSMNT_NOATTR2		0x00008000	/* turn off ATTR2 EA format */
 #define XFSMNT_32BITINODES	0x00200000	/* restrict inodes to 32
 						 * bits of address space */
 #define XFSMNT_GQUOTA		0x00400000	/* group quota accounting */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b7..361c7a755a0 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -994,9 +994,19 @@ xfs_mountfs(
 		 * Re-check for ATTR2 in case it was found in bad_features2
 		 * slot.
 		 */
-		if (xfs_sb_version_hasattr2(&mp->m_sb))
+		if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+		   !(mp->m_flags & XFS_MOUNT_NOATTR2))
 			mp->m_flags |= XFS_MOUNT_ATTR2;
+	}
+
+	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+	   (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+		xfs_sb_version_removeattr2(&mp->m_sb);
+		update_flags |= XFS_SB_FEATURES2;
 
+		/* update sb_versionnum for the clearing of the morebits */
+		if (!sbp->sb_features2)
+			update_flags |= XFS_SB_VERSIONNUM;
 	}
 
 	/*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358..4aff0c125ad 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -378,6 +378,7 @@ typedef struct xfs_mount {
 						   counters */
 #define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
 						   allocator */
+#define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
 
 
 /*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f87..e3204a36a22 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -473,6 +473,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
 		((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
 }
 
+static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+{
+	sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+	if (!sbp->sb_features2)
+		sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
+}
+
 /*
  * end of superblock version macros
  */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e..bbc911720d8 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -284,6 +284,8 @@ xfs_start_flags(
 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
 	if (ap->flags & XFSMNT_ATTR2)
 		mp->m_flags |= XFS_MOUNT_ATTR2;
+	if (ap->flags & XFSMNT_NOATTR2)
+		mp->m_flags |= XFS_MOUNT_NOATTR2;
 
 	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
 		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -346,7 +348,12 @@ xfs_finish_flags(
 		}
 	}
 
-	if (xfs_sb_version_hasattr2(&mp->m_sb))
+	/*
+	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+	 * told by noattr2 to turn it off
+	 */
+	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+	    !(ap->flags & XFSMNT_NOATTR2))
 		mp->m_flags |= XFS_MOUNT_ATTR2;
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From f0e2d93c29dc39ffd24cac180a19d48f700c0706 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 19 May 2008 16:31:57 +1000
Subject: [XFS] Remove unused arg from kmem_free()

kmem_free() function takes (ptr, size) arguments but doesn't actually use
second one.

This patch removes size argument from all callsites.

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31050a

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/kmem.c        |  4 ++--
 fs/xfs/linux-2.6/kmem.h        |  2 +-
 fs/xfs/linux-2.6/xfs_buf.c     |  9 ++++-----
 fs/xfs/linux-2.6/xfs_super.c   |  8 ++++----
 fs/xfs/quota/xfs_dquot_item.c  |  4 ++--
 fs/xfs/quota/xfs_qm.c          | 12 ++++++------
 fs/xfs/quota/xfs_qm_syscalls.c |  8 ++++----
 fs/xfs/support/ktrace.c        |  4 ++--
 fs/xfs/xfs_attr_leaf.c         | 18 +++++++++---------
 fs/xfs/xfs_bmap.c              |  2 +-
 fs/xfs/xfs_buf_item.c          |  8 ++++----
 fs/xfs/xfs_da_btree.c          | 22 +++++++++++-----------
 fs/xfs/xfs_dfrag.c             |  4 ++--
 fs/xfs/xfs_dir2.c              |  6 +++---
 fs/xfs/xfs_dir2_block.c        |  6 +++---
 fs/xfs/xfs_dir2_leaf.c         |  2 +-
 fs/xfs/xfs_dir2_sf.c           |  8 ++++----
 fs/xfs/xfs_error.c             |  5 ++---
 fs/xfs/xfs_extfree_item.c      |  6 ++----
 fs/xfs/xfs_inode.c             | 34 ++++++++++++++++------------------
 fs/xfs/xfs_inode_item.c        |  7 +++----
 fs/xfs/xfs_itable.c            |  6 +++---
 fs/xfs/xfs_log.c               |  4 ++--
 fs/xfs/xfs_log_recover.c       | 21 ++++++++-------------
 fs/xfs/xfs_mount.c             | 18 +++++++-----------
 fs/xfs/xfs_mru_cache.c         |  8 ++++----
 fs/xfs/xfs_rtalloc.c           |  2 +-
 fs/xfs/xfs_trans.c             |  4 ++--
 fs/xfs/xfs_trans_inode.c       |  2 +-
 fs/xfs/xfs_trans_item.c        |  8 ++++----
 fs/xfs/xfs_vfsops.c            |  8 ++++----
 31 files changed, 122 insertions(+), 138 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a050..69233a52f0a 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
 }
 
 void
-kmem_free(void *ptr, size_t size)
+kmem_free(void *ptr)
 {
 	if (!is_vmalloc_addr(ptr)) {
 		kfree(ptr);
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
 		if (new)
 			memcpy(new, ptr,
 				((oldsize < newsize) ? oldsize : newsize));
-		kmem_free(ptr, oldsize);
+		kmem_free(ptr);
 	}
 	return new;
 }
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a20683cf74d..a3c96207e60 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -58,7 +58,7 @@ extern void *kmem_alloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
 extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(void *, size_t);
+extern void  kmem_free(void *);
 
 /*
  * Zone interfaces
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b..ed03c6d3c9c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
 	xfs_buf_t	*bp)
 {
 	if (bp->b_pages != bp->b_page_array) {
-		kmem_free(bp->b_pages,
-			  bp->b_page_count * sizeof(struct page *));
+		kmem_free(bp->b_pages);
 	}
 }
 
@@ -1398,7 +1397,7 @@ STATIC void
 xfs_free_bufhash(
 	xfs_buftarg_t		*btp)
 {
-	kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+	kmem_free(btp->bt_hash);
 	btp->bt_hash = NULL;
 }
 
@@ -1444,7 +1443,7 @@ xfs_free_buftarg(
 	xfs_unregister_buftarg(btp);
 	kthread_stop(btp->bt_task);
 
-	kmem_free(btp, sizeof(*btp));
+	kmem_free(btp);
 }
 
 STATIC int
@@ -1575,7 +1574,7 @@ xfs_alloc_buftarg(
 	return btp;
 
 error:
-	kmem_free(btp, sizeof(*btp));
+	kmem_free(btp);
 	return NULL;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1b60e46f527..5c7144bc310 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1074,7 +1074,7 @@ xfssyncd(
 			list_del(&work->w_list);
 			if (work == &mp->m_sync_work)
 				continue;
-			kmem_free(work, sizeof(struct bhv_vfs_sync_work));
+			kmem_free(work);
 		}
 	}
 
@@ -1222,7 +1222,7 @@ xfs_fs_remount(
 	error = xfs_parseargs(mp, options, args, 1);
 	if (!error)
 		error = xfs_mntupdate(mp, flags, args);
-	kmem_free(args, sizeof(*args));
+	kmem_free(args);
 	return -error;
 }
 
@@ -1369,7 +1369,7 @@ xfs_fs_fill_super(
 
 	xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
 
-	kmem_free(args, sizeof(*args));
+	kmem_free(args);
 	return 0;
 
 fail_vnrele:
@@ -1384,7 +1384,7 @@ fail_unmount:
 	xfs_unmount(mp, 0, NULL);
 
 fail_vfsop:
-	kmem_free(args, sizeof(*args));
+	kmem_free(args);
 	return -error;
 }
 
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca7841..08d2fc89e6a 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
 	 * xfs_trans_delete_ail() drops the AIL lock.
 	 */
 	xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
-	kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
-	kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+	kmem_free(qfs);
+	kmem_free(qfe);
 	return (xfs_lsn_t)-1;
 }
 
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c..cde5c508f0e 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
 		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
 		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
 	}
-	kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
-	kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+	kmem_free(xqm->qm_usr_dqhtable);
+	kmem_free(xqm->qm_grp_dqhtable);
 	xqm->qm_usr_dqhtable = NULL;
 	xqm->qm_grp_dqhtable = NULL;
 	xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
 #ifdef DEBUG
 	mutex_destroy(&qcheck_lock);
 #endif
-	kmem_free(xqm, sizeof(xfs_qm_t));
+	kmem_free(xqm);
 }
 
 /*
@@ -1134,7 +1134,7 @@ xfs_qm_init_quotainfo(
 	 * and change the superblock accordingly.
 	 */
 	if ((error = xfs_qm_init_quotainos(mp))) {
-		kmem_free(qinf, sizeof(xfs_quotainfo_t));
+		kmem_free(qinf);
 		mp->m_quotainfo = NULL;
 		return error;
 	}
@@ -1248,7 +1248,7 @@ xfs_qm_destroy_quotainfo(
 		qi->qi_gquotaip = NULL;
 	}
 	mutex_destroy(&qi->qi_quotaofflock);
-	kmem_free(qi, sizeof(xfs_quotainfo_t));
+	kmem_free(qi);
 	mp->m_quotainfo = NULL;
 }
 
@@ -1623,7 +1623,7 @@ xfs_qm_dqiterate(
 			break;
 	} while (nmaps > 0);
 
-	kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+	kmem_free(map);
 
 	return error;
 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b..413671523cb 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
 		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
 			xfs_dqtest_cmp(d);
 			e = (xfs_dqtest_t *) d->HL_NEXT;
-			kmem_free(d, sizeof(xfs_dqtest_t));
+			kmem_free(d);
 			d = e;
 		}
 		h1 = &qmtest_gdqtab[i];
 		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
 			xfs_dqtest_cmp(d);
 			e = (xfs_dqtest_t *) d->HL_NEXT;
-			kmem_free(d, sizeof(xfs_dqtest_t));
+			kmem_free(d);
 			d = e;
 		}
 	}
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
 	} else {
 		cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
 	}
-	kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
-	kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+	kmem_free(qmtest_udqtab);
+	kmem_free(qmtest_gdqtab);
 	mutex_unlock(&qcheck_lock);
 	return (qmtest_nfails);
 }
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508..a34ef05489b 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 		if (sleep & KM_SLEEP)
 			panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
 
-		kmem_free(ktp, sizeof(*ktp));
+		kmem_free(ktp);
 
 		return NULL;
 	}
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
 	} else {
 		entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
 
-		kmem_free(ktp->kt_entries, entries_size);
+		kmem_free(ktp->kt_entries);
 	}
 
 	kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217..a85e9caf015 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -555,7 +555,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 out:
 	if(bp)
 		xfs_da_buf_done(bp);
-	kmem_free(tmpbuffer, size);
+	kmem_free(tmpbuffer);
 	return(error);
 }
 
@@ -676,7 +676,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 					     XFS_ERRLEVEL_LOW,
 					     context->dp->i_mount, sfe);
 			xfs_attr_trace_l_c("sf corrupted", context);
-			kmem_free(sbuf, sbsize);
+			kmem_free(sbuf);
 			return XFS_ERROR(EFSCORRUPTED);
 		}
 		if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
@@ -717,7 +717,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 		}
 	}
 	if (i == nsbuf) {
-		kmem_free(sbuf, sbsize);
+		kmem_free(sbuf);
 		xfs_attr_trace_l_c("blk end", context);
 		return(0);
 	}
@@ -747,7 +747,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 		cursor->offset++;
 	}
 
-	kmem_free(sbuf, sbsize);
+	kmem_free(sbuf);
 	xfs_attr_trace_l_c("sf E-O-F", context);
 	return(0);
 }
@@ -873,7 +873,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	error = 0;
 
 out:
-	kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+	kmem_free(tmpbuffer);
 	return(error);
 }
 
@@ -1271,7 +1271,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
 				be16_to_cpu(hdr_s->count), mp);
 	xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
 
-	kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+	kmem_free(tmpbuffer);
 }
 
 /*
@@ -1921,7 +1921,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 				be16_to_cpu(drop_hdr->count), mp);
 		}
 		memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
-		kmem_free(tmpbuffer, state->blocksize);
+		kmem_free(tmpbuffer);
 	}
 
 	xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2451,7 +2451,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 						(int)name_rmt->namelen,
 						valuelen,
 						(char*)args.value);
-				kmem_free(args.value, valuelen);
+				kmem_free(args.value);
 			}
 			else {
 				retval = context->put_listent(context,
@@ -2954,7 +2954,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 			error = tmp;	/* save only the 1st errno */
 	}
 
-	kmem_free((xfs_caddr_t)list, size);
+	kmem_free((xfs_caddr_t)list);
 	return(error);
 }
 
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5a..a612a90aae4 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5970,7 +5970,7 @@ unlock_and_return:
 	xfs_iunlock_map_shared(ip, lock);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-	kmem_free(map, subnex * sizeof(*map));
+	kmem_free(map);
 
 	return error;
 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025..d86ca2c03a7 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
 	}
 
 #ifdef XFS_TRANS_DEBUG
-	kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+	kmem_free(bip->bli_orig);
 	bip->bli_orig = NULL;
-	kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+	kmem_free(bip->bli_logged);
 	bip->bli_logged = NULL;
 #endif /* XFS_TRANS_DEBUG */
 
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
 	xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
 
 #ifdef XFS_TRANS_DEBUG
-	kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+	kmem_free(bip->bli_orig);
 	bip->bli_orig = NULL;
-	kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+	kmem_free(bip->bli_logged);
 	bip->bli_logged = NULL;
 #endif /* XFS_TRANS_DEBUG */
 
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563..294780427ab 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1598,7 +1598,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 					args->firstblock, args->total,
 					&mapp[mapi], &nmap, args->flist,
 					NULL))) {
-				kmem_free(mapp, sizeof(*mapp) * count);
+				kmem_free(mapp);
 				return error;
 			}
 			if (nmap < 1)
@@ -1620,11 +1620,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
 	    bno + count) {
 		if (mapp != &map)
-			kmem_free(mapp, sizeof(*mapp) * count);
+			kmem_free(mapp);
 		return XFS_ERROR(ENOSPC);
 	}
 	if (mapp != &map)
-		kmem_free(mapp, sizeof(*mapp) * count);
+		kmem_free(mapp);
 	*new_blkno = (xfs_dablk_t)bno;
 	return 0;
 }
@@ -2090,10 +2090,10 @@ xfs_da_do_buf(
 		}
 	}
 	if (bplist) {
-		kmem_free(bplist, sizeof(*bplist) * nmap);
+		kmem_free(bplist);
 	}
 	if (mapp != &map) {
-		kmem_free(mapp, sizeof(*mapp) * nfsb);
+		kmem_free(mapp);
 	}
 	if (bpp)
 		*bpp = rbp;
@@ -2102,11 +2102,11 @@ exit1:
 	if (bplist) {
 		for (i = 0; i < nbplist; i++)
 			xfs_trans_brelse(trans, bplist[i]);
-		kmem_free(bplist, sizeof(*bplist) * nmap);
+		kmem_free(bplist);
 	}
 exit0:
 	if (mapp != &map)
-		kmem_free(mapp, sizeof(*mapp) * nfsb);
+		kmem_free(mapp);
 	if (bpp)
 		*bpp = NULL;
 	return error;
@@ -2315,7 +2315,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 	if (dabuf->dirty)
 		xfs_da_buf_clean(dabuf);
 	if (dabuf->nbuf > 1)
-		kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+		kmem_free(dabuf->data);
 #ifdef XFS_DABUF_DEBUG
 	{
 		spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2332,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 	if (dabuf->nbuf == 1)
 		kmem_zone_free(xfs_dabuf_zone, dabuf);
 	else
-		kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+		kmem_free(dabuf);
 }
 
 /*
@@ -2403,7 +2403,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 	for (i = 0; i < nbuf; i++)
 		xfs_trans_brelse(tp, bplist[i]);
 	if (bplist != &bp)
-		kmem_free(bplist, nbuf * sizeof(*bplist));
+		kmem_free(bplist);
 }
 
 /*
@@ -2429,7 +2429,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 	for (i = 0; i < nbuf; i++)
 		xfs_trans_binval(tp, bplist[i]);
 	if (bplist != &bp)
-		kmem_free(bplist, nbuf * sizeof(*bplist));
+		kmem_free(bplist);
 }
 
 /*
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb988..2211e885ef2 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
  out_put_file:
 	fput(file);
  out_free_sxp:
-	kmem_free(sxp, sizeof(xfs_swapext_t));
+	kmem_free(sxp);
  out:
 	return error;
 }
@@ -381,6 +381,6 @@ xfs_swap_extents(
 		xfs_iunlock(tip, lock_flags);
 	}
 	if (tempifp != NULL)
-		kmem_free(tempifp, sizeof(xfs_ifork_t));
+		kmem_free(tempifp);
 	return error;
 }
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766..0284af1734b 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -493,7 +493,7 @@ xfs_dir2_grow_inode(
 					args->firstblock, args->total,
 					&mapp[mapi], &nmap, args->flist,
 					NULL))) {
-				kmem_free(mapp, sizeof(*mapp) * count);
+				kmem_free(mapp);
 				return error;
 			}
 			if (nmap < 1)
@@ -525,14 +525,14 @@ xfs_dir2_grow_inode(
 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
 	    bno + count) {
 		if (mapp != &map)
-			kmem_free(mapp, sizeof(*mapp) * count);
+			kmem_free(mapp);
 		return XFS_ERROR(ENOSPC);
 	}
 	/*
 	 * Done with the temporary mapping table.
 	 */
 	if (mapp != &map)
-		kmem_free(mapp, sizeof(*mapp) * count);
+		kmem_free(mapp);
 	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
 	/*
 	 * Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b..e8a7aca5fe2 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1071,7 +1071,7 @@ xfs_dir2_sf_to_block(
 	 */
 	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
 	if (error) {
-		kmem_free(buf, buf_len);
+		kmem_free(buf);
 		return error;
 	}
 	/*
@@ -1079,7 +1079,7 @@ xfs_dir2_sf_to_block(
 	 */
 	error = xfs_dir2_data_init(args, blkno, &bp);
 	if (error) {
-		kmem_free(buf, buf_len);
+		kmem_free(buf);
 		return error;
 	}
 	block = bp->data;
@@ -1198,7 +1198,7 @@ xfs_dir2_sf_to_block(
 			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/* Done with the temporary buffer */
-	kmem_free(buf, buf_len);
+	kmem_free(buf);
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79..e33433408e4 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1110,7 +1110,7 @@ xfs_dir2_leaf_getdents(
 		*offset = XFS_DIR2_MAX_DATAPTR;
 	else
 		*offset = xfs_dir2_byte_to_dataptr(mp, curoff);
-	kmem_free(map, map_size * sizeof(*map));
+	kmem_free(map);
 	if (bp)
 		xfs_da_brelse(NULL, bp);
 	return error;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1ce..ca33bc62edc 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
 	xfs_dir2_sf_check(args);
 out:
 	xfs_trans_log_inode(args->trans, dp, logflags);
-	kmem_free(block, mp->m_dirblksize);
+	kmem_free(block);
 	return error;
 }
 
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
 		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
-	kmem_free(buf, old_isize);
+	kmem_free(buf);
 	dp->i_d.di_size = new_isize;
 	xfs_dir2_sf_check(args);
 }
@@ -1174,7 +1174,7 @@ xfs_dir2_sf_toino4(
 	/*
 	 * Clean up the inode.
 	 */
-	kmem_free(buf, oldsize);
+	kmem_free(buf);
 	dp->i_d.di_size = newsize;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
@@ -1251,7 +1251,7 @@ xfs_dir2_sf_toino8(
 	/*
 	 * Clean up the inode.
 	 */
-	kmem_free(buf, oldsize);
+	kmem_free(buf);
 	dp->i_d.di_size = newsize;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c3..7380a00644c 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -150,8 +150,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
 				xfs_etest[i]);
 			xfs_etest[i] = 0;
 			xfs_etest_fsid[i] = 0LL;
-			kmem_free(xfs_etest_fsname[i],
-				  strlen(xfs_etest_fsname[i]) + 1);
+			kmem_free(xfs_etest_fsname[i]);
 			xfs_etest_fsname[i] = NULL;
 		}
 	}
@@ -175,7 +174,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
 		newfmt = kmem_alloc(len, KM_SLEEP);
 		sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
 		icmn_err(level, newfmt, ap);
-		kmem_free(newfmt, len);
+		kmem_free(newfmt);
 	} else {
 		icmn_err(level, fmt, ap);
 	}
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb..8aa28f751b2 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
 	int nexts = efip->efi_format.efi_nextents;
 
 	if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-		kmem_free(efip, sizeof(xfs_efi_log_item_t) +
-				(nexts - 1) * sizeof(xfs_extent_t));
+		kmem_free(efip);
 	} else {
 		kmem_zone_free(xfs_efi_zone, efip);
 	}
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
 	int nexts = efdp->efd_format.efd_nextents;
 
 	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-		kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
-				(nexts - 1) * sizeof(xfs_extent_t));
+		kmem_free(efdp);
 	} else {
 		kmem_zone_free(xfs_efd_zone, efdp);
 	}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf..4b21490334b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2258,7 +2258,7 @@ xfs_ifree_cluster(
 		xfs_trans_binval(tp, bp);
 	}
 
-	kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+	kmem_free(ip_found);
 	xfs_put_perag(mp, pag);
 }
 
@@ -2470,7 +2470,7 @@ xfs_iroot_realloc(
 						     (int)new_size);
 		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
 	}
-	kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+	kmem_free(ifp->if_broot);
 	ifp->if_broot = new_broot;
 	ifp->if_broot_bytes = (int)new_size;
 	ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2514,7 @@ xfs_idata_realloc(
 
 	if (new_size == 0) {
 		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+			kmem_free(ifp->if_u1.if_data);
 		}
 		ifp->if_u1.if_data = NULL;
 		real_size = 0;
@@ -2529,7 +2529,7 @@ xfs_idata_realloc(
 			ASSERT(ifp->if_real_bytes != 0);
 			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
 			      new_size);
-			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+			kmem_free(ifp->if_u1.if_data);
 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 		}
 		real_size = 0;
@@ -2636,7 +2636,7 @@ xfs_idestroy_fork(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_broot != NULL) {
-		kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+		kmem_free(ifp->if_broot);
 		ifp->if_broot = NULL;
 	}
 
@@ -2650,7 +2650,7 @@ xfs_idestroy_fork(
 		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
 		    (ifp->if_u1.if_data != NULL)) {
 			ASSERT(ifp->if_real_bytes != 0);
-			kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+			kmem_free(ifp->if_u1.if_data);
 			ifp->if_u1.if_data = NULL;
 			ifp->if_real_bytes = 0;
 		}
@@ -3058,7 +3058,7 @@ xfs_iflush_cluster(
 
 out_free:
 	read_unlock(&pag->pag_ici_lock);
-	kmem_free(ilist, ilist_size);
+	kmem_free(ilist);
 	return 0;
 
 
@@ -3102,7 +3102,7 @@ cluster_corrupt_out:
 	 * Unlocks the flush lock
 	 */
 	xfs_iflush_abort(iq);
-	kmem_free(ilist, ilist_size);
+	kmem_free(ilist);
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
@@ -3836,7 +3836,7 @@ xfs_iext_add_indirect_multi(
 			erp = xfs_iext_irec_new(ifp, erp_idx);
 		}
 		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
-		kmem_free(nex2_ep, byte_diff);
+		kmem_free(nex2_ep);
 		erp->er_extcount += nex2;
 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
 	}
@@ -4112,7 +4112,7 @@ xfs_iext_direct_to_inline(
 	 */
 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
 		nextents * sizeof(xfs_bmbt_rec_t));
-	kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+	kmem_free(ifp->if_u1.if_extents);
 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 	ifp->if_real_bytes = 0;
 }
@@ -4186,7 +4186,7 @@ xfs_iext_indirect_to_direct(
 	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
 
 	ep = ifp->if_u1.if_ext_irec->er_extbuf;
-	kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+	kmem_free(ifp->if_u1.if_ext_irec);
 	ifp->if_flags &= ~XFS_IFEXTIREC;
 	ifp->if_u1.if_extents = ep;
 	ifp->if_bytes = size;
@@ -4212,7 +4212,7 @@ xfs_iext_destroy(
 		}
 		ifp->if_flags &= ~XFS_IFEXTIREC;
 	} else if (ifp->if_real_bytes) {
-		kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+		kmem_free(ifp->if_u1.if_extents);
 	} else if (ifp->if_bytes) {
 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
 			sizeof(xfs_bmbt_rec_t));
@@ -4483,7 +4483,7 @@ xfs_iext_irec_remove(
 	if (erp->er_extbuf) {
 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
 			-erp->er_extcount);
-		kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+		kmem_free(erp->er_extbuf);
 	}
 	/* Compact extent records */
 	erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4501,7 @@ xfs_iext_irec_remove(
 		xfs_iext_realloc_indirect(ifp,
 			nlists * sizeof(xfs_ext_irec_t));
 	} else {
-		kmem_free(ifp->if_u1.if_ext_irec,
-			sizeof(xfs_ext_irec_t));
+		kmem_free(ifp->if_u1.if_ext_irec);
 	}
 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
 }
@@ -4571,7 +4570,7 @@ xfs_iext_irec_compact_pages(
 			 * so er_extoffs don't get modified in
 			 * xfs_iext_irec_remove.
 			 */
-			kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+			kmem_free(erp_next->er_extbuf);
 			erp_next->er_extbuf = NULL;
 			xfs_iext_irec_remove(ifp, erp_idx + 1);
 			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4614,8 +4613,7 @@ xfs_iext_irec_compact_full(
 			 * so er_extoffs don't get modified in
 			 * xfs_iext_irec_remove.
 			 */
-			kmem_free(erp_next->er_extbuf,
-				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+			kmem_free(erp_next->er_extbuf);
 			erp_next->er_extbuf = NULL;
 			xfs_iext_irec_remove(ifp, erp_idx + 1);
 			erp = &ifp->if_u1.if_ext_irec[erp_idx];
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f1577..0eee08a32c2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
 		ASSERT(ip->i_d.di_nextents > 0);
 		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
 		ASSERT(ip->i_df.if_bytes > 0);
-		kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
+		kmem_free(iip->ili_extents_buf);
 		iip->ili_extents_buf = NULL;
 	}
 	if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
 		ASSERT(ip->i_d.di_anextents > 0);
 		ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
 		ASSERT(ip->i_afp->if_bytes > 0);
-		kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+		kmem_free(iip->ili_aextents_buf);
 		iip->ili_aextents_buf = NULL;
 	}
 
@@ -957,8 +957,7 @@ xfs_inode_item_destroy(
 {
 #ifdef XFS_TRANS_DEBUG
 	if (ip->i_itemp->ili_root_size != 0) {
-		kmem_free(ip->i_itemp->ili_orig_root,
-			  ip->i_itemp->ili_root_size);
+		kmem_free(ip->i_itemp->ili_orig_root);
 	}
 #endif
 	kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb4..9a3ef9dcaeb 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -257,7 +257,7 @@ xfs_bulkstat_one(
 		*ubused = error;
 
  out_free:
-	kmem_free(buf, sizeof(*buf));
+	kmem_free(buf);
 	return error;
 }
 
@@ -708,7 +708,7 @@ xfs_bulkstat(
 	/*
 	 * Done, we're either out of filesystem or space to put the data.
 	 */
-	kmem_free(irbuf, irbsize);
+	kmem_free(irbuf);
 	*ubcountp = ubelem;
 	/*
 	 * Found some inodes, return them now and return the error next time.
@@ -914,7 +914,7 @@ xfs_inumbers(
 		}
 		*lastino = XFS_AGINO_TO_INO(mp, agno, agino);
 	}
-	kmem_free(buffer, bcount * sizeof(*buffer));
+	kmem_free(buffer);
 	if (cur)
 		xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
 					   XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe3..16a01abe249 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1570,7 +1570,7 @@ xlog_dealloc_log(xlog_t *log)
 		}
 #endif
 		next_iclog = iclog->ic_next;
-		kmem_free(iclog, sizeof(xlog_in_core_t));
+		kmem_free(iclog);
 		iclog = next_iclog;
 	}
 	freesema(&log->l_flushsema);
@@ -1587,7 +1587,7 @@ xlog_dealloc_log(xlog_t *log)
 	}
 #endif
 	log->l_mp->m_log = NULL;
-	kmem_free(log, sizeof(xlog_t));
+	kmem_free(log);
 }	/* xlog_dealloc_log */
 
 /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af095..9eb722ec744 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
 					} else {
 						prevp->bc_next = bcp->bc_next;
 					}
-					kmem_free(bcp,
-						  sizeof(xfs_buf_cancel_t));
+					kmem_free(bcp);
 				}
 			}
 			return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
 
 error:
 	if (need_free)
-		kmem_free(in_f, sizeof(*in_f));
+		kmem_free(in_f);
 	return XFS_ERROR(error);
 }
 
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
 		item = item->ri_next;
 		 /* Free the regions in the item. */
 		for (i = 0; i < free_item->ri_cnt; i++) {
-			kmem_free(free_item->ri_buf[i].i_addr,
-				  free_item->ri_buf[i].i_len);
+			kmem_free(free_item->ri_buf[i].i_addr);
 		}
 		/* Free the item itself */
-		kmem_free(free_item->ri_buf,
-			  (free_item->ri_total * sizeof(xfs_log_iovec_t)));
-		kmem_free(free_item, sizeof(xlog_recover_item_t));
+		kmem_free(free_item->ri_buf);
+		kmem_free(free_item);
 	} while (first_item != item);
 	/* Free the transaction recover structure */
-	kmem_free(trans, sizeof(xlog_recover_t));
+	kmem_free(trans);
 }
 
 STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
 	error = xlog_do_recovery_pass(log, head_blk, tail_blk,
 				      XLOG_RECOVER_PASS1);
 	if (error != 0) {
-		kmem_free(log->l_buf_cancel_table,
-			  XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+		kmem_free(log->l_buf_cancel_table);
 		log->l_buf_cancel_table = NULL;
 		return error;
 	}
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
 	}
 #endif	/* DEBUG */
 
-	kmem_free(log->l_buf_cancel_table,
-		  XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+	kmem_free(log->l_buf_cancel_table);
 	log->l_buf_cancel_table = NULL;
 
 	return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 361c7a755a0..c63f410ccfa 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -161,11 +161,8 @@ xfs_mount_free(
 
 		for (agno = 0; agno < mp->m_maxagi; agno++)
 			if (mp->m_perag[agno].pagb_list)
-				kmem_free(mp->m_perag[agno].pagb_list,
-						sizeof(xfs_perag_busy_t) *
-							XFS_PAGB_NUM_SLOTS);
-		kmem_free(mp->m_perag,
-			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
+				kmem_free(mp->m_perag[agno].pagb_list);
+		kmem_free(mp->m_perag);
 	}
 
 	spinlock_destroy(&mp->m_ail_lock);
@@ -176,11 +173,11 @@ xfs_mount_free(
 		XFS_QM_DONE(mp);
 
 	if (mp->m_fsname != NULL)
-		kmem_free(mp->m_fsname, mp->m_fsname_len);
+		kmem_free(mp->m_fsname);
 	if (mp->m_rtname != NULL)
-		kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
+		kmem_free(mp->m_rtname);
 	if (mp->m_logname != NULL)
-		kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
+		kmem_free(mp->m_logname);
 
 	xfs_icsb_destroy_counters(mp);
 }
@@ -1265,9 +1262,8 @@ xfs_mountfs(
  error2:
 	for (agno = 0; agno < sbp->sb_agcount; agno++)
 		if (mp->m_perag[agno].pagb_list)
-			kmem_free(mp->m_perag[agno].pagb_list,
-			  sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
-	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
+			kmem_free(mp->m_perag[agno].pagb_list);
+	kmem_free(mp->m_perag);
 	mp->m_perag = NULL;
 	/* FALLTHROUGH */
  error1:
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589..26d14a1e0e1 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -382,9 +382,9 @@ xfs_mru_cache_create(
 
 exit:
 	if (err && mru && mru->lists)
-		kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+		kmem_free(mru->lists);
 	if (err && mru)
-		kmem_free(mru, sizeof(*mru));
+		kmem_free(mru);
 
 	return err;
 }
@@ -424,8 +424,8 @@ xfs_mru_cache_destroy(
 
 	xfs_mru_cache_flush(mru);
 
-	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
-	kmem_free(mru, sizeof(*mru));
+	kmem_free(mru->lists);
+	kmem_free(mru);
 }
 
 /*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b..bf87a591350 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2062,7 +2062,7 @@ xfs_growfs_rt(
 	/*
 	 * Free the fake mp structure.
 	 */
-	kmem_free(nmp, sizeof(*nmp));
+	kmem_free(nmp);
 
 	return error;
 }
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa..e4ebddd3c50 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -889,7 +889,7 @@ shut_us_down:
 
 	tp->t_commit_lsn = commit_lsn;
 	if (nvec > XFS_TRANS_LOGVEC_COUNT) {
-		kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
+		kmem_free(log_vector);
 	}
 
 	/*
@@ -1265,7 +1265,7 @@ xfs_trans_committed(
 		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
 		xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
 		next_licp = licp->lic_next;
-		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		kmem_free(licp);
 		licp = next_licp;
 	}
 
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e998..2a1c0f071f9 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
 	iip = ip->i_itemp;
 	if (iip->ili_root_size != 0) {
 		ASSERT(iip->ili_orig_root != NULL);
-		kmem_free(iip->ili_orig_root, iip->ili_root_size);
+		kmem_free(iip->ili_orig_root);
 		iip->ili_root_size = 0;
 		iip->ili_orig_root = NULL;
 	}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894..db5c8359552 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -161,7 +161,7 @@ xfs_trans_free_item(xfs_trans_t	*tp, xfs_log_item_desc_t *lidp)
 			licpp = &((*licpp)->lic_next);
 		}
 		*licpp = licp->lic_next;
-		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		kmem_free(licp);
 		tp->t_items_free -= XFS_LIC_NUM_SLOTS;
 	}
 }
@@ -314,7 +314,7 @@ xfs_trans_free_items(
 		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
 		(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
 		next_licp = licp->lic_next;
-		kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+		kmem_free(licp);
 		licp = next_licp;
 	}
 
@@ -363,7 +363,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
 		next_licp = licp->lic_next;
 		if (XFS_LIC_ARE_ALL_FREE(licp)) {
 			*licpp = next_licp;
-			kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+			kmem_free(licp);
 			freed -= XFS_LIC_NUM_SLOTS;
 		} else {
 			licpp = &(licp->lic_next);
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
 	lbcp = tp->t_busy.lbc_next;
 	while (lbcp != NULL) {
 		lbcq = lbcp->lbc_next;
-		kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t));
+		kmem_free(lbcp);
 		lbcp = lbcq;
 	}
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index bbc911720d8..a005cebf504 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -639,7 +639,7 @@ out:
 		xfs_unmountfs(mp, credp);
 		xfs_qmops_put(mp);
 		xfs_dmops_put(mp);
-		kmem_free(mp, sizeof(xfs_mount_t));
+		kmem_free(mp);
 	}
 
 	return XFS_ERROR(error);
@@ -1055,7 +1055,7 @@ xfs_sync_inodes(
 
 		if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
 			XFS_MOUNT_IUNLOCK(mp);
-			kmem_free(ipointer, sizeof(xfs_iptr_t));
+			kmem_free(ipointer);
 			return 0;
 		}
 
@@ -1201,7 +1201,7 @@ xfs_sync_inodes(
 			}
 			XFS_MOUNT_IUNLOCK(mp);
 			ASSERT(ipointer_in == B_FALSE);
-			kmem_free(ipointer, sizeof(xfs_iptr_t));
+			kmem_free(ipointer);
 			return XFS_ERROR(error);
 		}
 
@@ -1231,7 +1231,7 @@ xfs_sync_inodes(
 
 	ASSERT(ipointer_in == B_FALSE);
 
-	kmem_free(ipointer, sizeof(xfs_iptr_t));
+	kmem_free(ipointer);
 	return XFS_ERROR(last_error);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 4f0e8a9816e78306bb821018613dbd2513184d8a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 19 May 2008 16:34:04 +1000
Subject: [XFS] Remove unused Falgs parameter from xfs_qm_dqpurge()

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31056a

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/quota/xfs_dquot.c | 3 +--
 fs/xfs/quota/xfs_dquot.h | 2 +-
 fs/xfs/quota/xfs_qm.c    | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd..fc9f3fb39b7 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1435,8 +1435,7 @@ xfs_dqlock2(
 /* ARGSUSED */
 int
 xfs_qm_dqpurge(
-	xfs_dquot_t	*dqp,
-	uint		flags)
+	xfs_dquot_t	*dqp)
 {
 	xfs_dqhash_t	*thishash;
 	xfs_mount_t	*mp = dqp->q_mount;
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e..f7393bba4e9 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -164,7 +164,7 @@ extern void		xfs_qm_dqprint(xfs_dquot_t *);
 
 extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int		xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern int		xfs_qm_dqpurge(xfs_dquot_t *);
 extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
 extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *);
 extern int		xfs_qm_dqflock_nowait(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index cde5c508f0e..26370a3128f 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -631,7 +631,7 @@ xfs_qm_dqpurge_int(
 		 * freelist in INACTIVE state.
 		 */
 		nextdqp = dqp->MPL_NEXT;
-		nmisses += xfs_qm_dqpurge(dqp, flags);
+		nmisses += xfs_qm_dqpurge(dqp);
 		dqp = nextdqp;
 	}
 	xfs_qm_mplist_unlock(mp);
-- 
cgit v1.2.3-70-g09d2


From b41759cf11c84ad0d569c0ef200c449ad2cc24e3 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 19 May 2008 16:34:11 +1000
Subject: [XFS] Remove unused wbc parameter from xfs_start_page_writeback()

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31057a

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d84..0b211cba190 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -409,7 +409,6 @@ xfs_start_buffer_writeback(
 STATIC void
 xfs_start_page_writeback(
 	struct page		*page,
-	struct writeback_control *wbc,
 	int			clear_dirty,
 	int			buffers)
 {
@@ -858,7 +857,7 @@ xfs_convert_page(
 				done = 1;
 			}
 		}
-		xfs_start_page_writeback(page, wbc, !page_dirty, count);
+		xfs_start_page_writeback(page, !page_dirty, count);
 	}
 
 	return done;
@@ -1130,7 +1129,7 @@ xfs_page_state_convert(
 		SetPageUptodate(page);
 
 	if (startio)
-		xfs_start_page_writeback(page, wbc, 1, count);
+		xfs_start_page_writeback(page, 1, count);
 
 	if (ioend && iomap_valid) {
 		offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
-- 
cgit v1.2.3-70-g09d2


From d729eae8933cb3eb8edf1446532c178b66b293a9 Mon Sep 17 00:00:00 2001
From: Michael Nishimoto <miken@agami.com>
Date: Mon, 19 May 2008 16:34:20 +1000
Subject: [XFS] Ensure that 2 GiB xfs logs work properly.

We found this while experimenting with 2GiB xfs logs. The previous code
never assumed that xfs logs would ever get so large.

SGI-PV: 981502
SGI-Modid: xfs-linux-melb:xfs-kern:31058a

Signed-off-by: Michael Nishimoto <miken@agami.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_log.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 16a01abe249..6195cc88010 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
 static void
 xlog_grant_add_space_write(struct log *log, int bytes)
 {
-	log->l_grant_write_bytes += bytes;
-	if (log->l_grant_write_bytes > log->l_logsize) {
-		log->l_grant_write_bytes -= log->l_logsize;
+	int tmp = log->l_logsize - log->l_grant_write_bytes;
+	if (tmp > bytes)
+		log->l_grant_write_bytes += bytes;
+	else {
 		log->l_grant_write_cycle++;
+		log->l_grant_write_bytes = bytes - tmp;
 	}
 }
 
 static void
 xlog_grant_add_space_reserve(struct log *log, int bytes)
 {
-	log->l_grant_reserve_bytes += bytes;
-	if (log->l_grant_reserve_bytes > log->l_logsize) {
-		log->l_grant_reserve_bytes -= log->l_logsize;
+	int tmp = log->l_logsize - log->l_grant_reserve_bytes;
+	if (tmp > bytes)
+		log->l_grant_reserve_bytes += bytes;
+	else {
 		log->l_grant_reserve_cycle++;
+		log->l_grant_reserve_bytes = bytes - tmp;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From d748c62367eb630cc30b91d561a5362f597a0892 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Mon, 19 May 2008 16:34:27 +1000
Subject: [XFS] Convert l_flushsema to a sv_t

The l_flushsema doesn't exactly have completion semantics, nor mutex
semantics. It's used as a list of tasks which are waiting to be notified
that a flush has completed. It was also being used in a way that was
potentially racy, depending on the semaphore implementation.

By using a sv_t instead of a semaphore we avoid the need for a separate
counter, since we know we just need to wake everything on the queue.

Original waitqueue implementation from Matthew Wilcox. Cleanup and
conversion to sv_t by Christoph Hellwig.

SGI-PV: 981507
SGI-Modid: xfs-linux-melb:xfs-kern:31059a

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_log.c      | 29 +++++++++++++----------------
 fs/xfs/xfs_log_priv.h |  6 ++----
 2 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6195cc88010..91b00a5686c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1232,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 
 	spin_lock_init(&log->l_icloglock);
 	spin_lock_init(&log->l_grant_lock);
-	initnsema(&log->l_flushsema, 0, "ic-flush");
+	sv_init(&log->l_flush_wait, 0, "flush_wait");
 
 	/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
 	ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1577,7 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
 		kmem_free(iclog);
 		iclog = next_iclog;
 	}
-	freesema(&log->l_flushsema);
 	spinlock_destroy(&log->l_icloglock);
 	spinlock_destroy(&log->l_grant_lock);
 
@@ -2101,6 +2100,7 @@ xlog_state_do_callback(
 	int		   funcdidcallbacks; /* flag: function did callbacks */
 	int		   repeats;	/* for issuing console warnings if
 					 * looping too many times */
+	int		   wake = 0;
 
 	spin_lock(&log->l_icloglock);
 	first_iclog = iclog = log->l_iclog;
@@ -2282,15 +2282,13 @@ xlog_state_do_callback(
 	}
 #endif
 
-	flushcnt = 0;
-	if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
-		flushcnt = log->l_flushcnt;
-		log->l_flushcnt = 0;
-	}
+	if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+		wake = 1;
 	spin_unlock(&log->l_icloglock);
-	while (flushcnt--)
-		vsema(&log->l_flushsema);
-}	/* xlog_state_do_callback */
+
+	if (wake)
+		sv_broadcast(&log->l_flush_wait);
+}
 
 
 /*
@@ -2388,16 +2386,15 @@ restart:
 	}
 
 	iclog = log->l_iclog;
-	if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
-		log->l_flushcnt++;
-		spin_unlock(&log->l_icloglock);
+	if (iclog->ic_state != XLOG_STATE_ACTIVE) {
 		xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
 		XFS_STATS_INC(xs_log_noiclogs);
-		/* Ensure that log writes happen */
-		psema(&log->l_flushsema, PINOD);
+
+		/* Wait for log writes to have flushed */
+		sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
 		goto restart;
 	}
-	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+
 	head = &iclog->ic_header;
 
 	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f..6245913196b 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -423,10 +423,8 @@ typedef struct log {
 	int			l_logBBsize;    /* size of log in BB chunks */
 
 	/* The following block of fields are changed while holding icloglock */
-	sema_t			l_flushsema ____cacheline_aligned_in_smp;
-						/* iclog flushing semaphore */
-	int			l_flushcnt;	/* # of procs waiting on this
-						 * sema */
+	sv_t			l_flush_wait ____cacheline_aligned_in_smp;
+						/* waiting for iclog flush */
 	int			l_covered_state;/* state of "covering disk
 						 * log entries" */
 	xlog_in_core_t		*l_iclog;       /* head log queue	*/
-- 
cgit v1.2.3-70-g09d2


From 911ee3de3d1cb6620e2ac4e0678ff434867e2644 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 19 May 2008 16:34:34 +1000
Subject: [XFS] Kill attr_capable checks as already done in xattr_permission.

No need for addition permission checks in the xattr handler,
fs/xattr.c:xattr_permission() already does them, and in fact slightly more
strict then what was in the attr_capable handlers.

SGI-PV: 981809
SGI-Modid: xfs-linux-melb:xfs-kern:31164a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 13 +------------
 fs/xfs/xfs_attr.c           | 41 -----------------------------------------
 fs/xfs/xfs_attr.h           |  2 --
 3 files changed, 1 insertion(+), 55 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 5fc61c824bb..13b6cfd366c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -739,15 +739,11 @@ xfs_vn_setxattr(
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
-	int		error;
 
 	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
 	if (!namesp)
 		return -EOPNOTSUPP;
 	attr += namesp->attr_namelen;
-	error = namesp->attr_capable(vp, NULL);
-	if (error)
-		return error;
 
 	/* Convert Linux syscall to XFS internal ATTR flags */
 	if (flags & XATTR_CREATE)
@@ -769,15 +765,11 @@ xfs_vn_getxattr(
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
-	ssize_t		error;
 
 	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
 	if (!namesp)
 		return -EOPNOTSUPP;
 	attr += namesp->attr_namelen;
-	error = namesp->attr_capable(vp, NULL);
-	if (error)
-		return error;
 
 	/* Convert Linux syscall to XFS internal ATTR flags */
 	if (!size) {
@@ -817,15 +809,12 @@ xfs_vn_removexattr(
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
-	int		error;
 
 	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
 	if (!namesp)
 		return -EOPNOTSUPP;
 	attr += namesp->attr_namelen;
-	error = namesp->attr_capable(vp, NULL);
-	if (error)
-		return error;
+
 	xflags |= namesp->attr_flag;
 	return namesp->attr_remove(vp, attr, xflags);
 }
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a85918..86d8619f279 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2622,43 +2622,6 @@ attr_lookup_namespace(
 	return NULL;
 }
 
-/*
- * Some checks to prevent people abusing EAs to get over quota:
- * - Don't allow modifying user EAs on devices/symlinks;
- * - Don't allow modifying user EAs if sticky bit set;
- */
-STATIC int
-attr_user_capable(
-	bhv_vnode_t	*vp,
-	cred_t		*cred)
-{
-	struct inode	*inode = vn_to_inode(vp);
-
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
-	    !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
-	    (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
-		return -EPERM;
-	return 0;
-}
-
-STATIC int
-attr_trusted_capable(
-	bhv_vnode_t	*vp,
-	cred_t		*cred)
-{
-	struct inode	*inode = vn_to_inode(vp);
-
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	return 0;
-}
-
 STATIC int
 attr_system_set(
 	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
@@ -2709,7 +2672,6 @@ struct attrnames attr_system = {
 	.attr_get	= attr_system_get,
 	.attr_set	= attr_system_set,
 	.attr_remove	= attr_system_remove,
-	.attr_capable	= (attrcapable_t)fs_noerr,
 };
 
 struct attrnames attr_trusted = {
@@ -2719,7 +2681,6 @@ struct attrnames attr_trusted = {
 	.attr_get	= attr_generic_get,
 	.attr_set	= attr_generic_set,
 	.attr_remove	= attr_generic_remove,
-	.attr_capable	= attr_trusted_capable,
 };
 
 struct attrnames attr_secure = {
@@ -2729,7 +2690,6 @@ struct attrnames attr_secure = {
 	.attr_get	= attr_generic_get,
 	.attr_set	= attr_generic_set,
 	.attr_remove	= attr_generic_remove,
-	.attr_capable	= (attrcapable_t)fs_noerr,
 };
 
 struct attrnames attr_user = {
@@ -2738,7 +2698,6 @@ struct attrnames attr_user = {
 	.attr_get	= attr_generic_get,
 	.attr_set	= attr_generic_set,
 	.attr_remove	= attr_generic_remove,
-	.attr_capable	= attr_user_capable,
 };
 
 struct attrnames *attr_namespaces[] =
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe3..9b96d171b75 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -42,7 +42,6 @@ typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
 typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
 typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
 typedef int (*attrexists_t)(bhv_vnode_t *);
-typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
 
 typedef struct attrnames {
 	char *		attr_name;
@@ -52,7 +51,6 @@ typedef struct attrnames {
 	attrset_t	attr_set;
 	attrremove_t	attr_remove;
 	attrexists_t	attr_exists;
-	attrcapable_t	attr_capable;
 } attrnames_t;
 
 #define ATTR_NAMECOUNT	4
-- 
cgit v1.2.3-70-g09d2


From 4b166de0a061e4e89d0741a5d080b141f11e2c9b Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 20 May 2008 11:30:27 +1000
Subject: [XFS] Update valid fields in xfs_mount_log_sb()

Recent changes to update the version number during mount (attr2 stuff)
failed to change the assert that checked for calid flags being changed on
mount. Clearly this path hasn't been exercised by the test code....

SGI-PV: 981950
SGI-Modid: xfs-linux-melb:xfs-kern:31183a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_mount.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c63f410ccfa..b484ca32641 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1934,7 +1934,8 @@ xfs_mount_log_sb(
 	int		error;
 
 	ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
-			 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
+			 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
+			 XFS_SB_VERSIONNUM));
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
 	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-- 
cgit v1.2.3-70-g09d2


From fa6adbe08825274a3803abb9aef365f939be7da5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:30:33 +1000
Subject: [XFS] kill xfs_uuid_unmount

Quite useless wrapper that doesn't help making the code more readable.

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31184a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_mount.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b484ca32641..fca3f8af674 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,7 +47,6 @@
 
 STATIC int	xfs_mount_log_sb(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
-STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
 
@@ -1268,7 +1267,7 @@ xfs_mountfs(
 	/* FALLTHROUGH */
  error1:
 	if (uuid_mounted)
-		xfs_uuid_unmount(mp);
+		uuid_table_remove(&mp->m_sb.sb_uuid);
 	xfs_freesb(mp);
 	return error;
 }
@@ -1349,7 +1348,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 
 	xfs_unmountfs_close(mp, cr);
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
-		xfs_uuid_unmount(mp);
+		uuid_table_remove(&mp->m_sb.sb_uuid);
 
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	xfs_errortag_clearall(mp, 0);
@@ -1910,16 +1909,6 @@ xfs_uuid_mount(
 	return 0;
 }
 
-/*
- * Remove filesystem from the UUID table.
- */
-STATIC void
-xfs_uuid_unmount(
-	xfs_mount_t	*mp)
-{
-	uuid_table_remove(&mp->m_sb.sb_uuid);
-}
-
 /*
  * Used to log changes to the superblock unit and width fields which could
  * be altered by the mount options, as well as any potential sb_features2
-- 
cgit v1.2.3-70-g09d2


From 48b62a1a97f118a5a71ae9222bc6d3481d6b757b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:30:39 +1000
Subject: [XFS] merge xfs_mntupdate into xfs_fs_remount

xfs_mntupdate already is completely Linux specific due to the VFS flags
passed in, so it might aswell be merged into xfs_fs_remount.

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31185a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 23 +++++++++++++++++++++--
 fs/xfs/xfs_vfsops.c          | 24 ------------------------
 fs/xfs/xfs_vfsops.h          |  2 --
 3 files changed, 21 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5c7144bc310..a3ecdf44246 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,7 @@
 #include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -1220,8 +1221,26 @@ xfs_fs_remount(
 	int			error;
 
 	error = xfs_parseargs(mp, options, args, 1);
-	if (!error)
-		error = xfs_mntupdate(mp, flags, args);
+	if (error)
+		goto out_free_args;
+
+	if (!(*flags & MS_RDONLY)) {			/* rw/ro -> rw */
+		if (mp->m_flags & XFS_MOUNT_RDONLY)
+		mp->m_flags &= ~XFS_MOUNT_RDONLY;
+		if (args->flags & XFSMNT_BARRIER) {
+			mp->m_flags |= XFS_MOUNT_BARRIER;
+			xfs_mountfs_check_barriers(mp);
+		} else {
+			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		}
+	} else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {	/* rw -> ro */
+		xfs_filestream_flush(mp);
+		xfs_sync(mp, SYNC_DATA_QUIESCE);
+		xfs_attr_quiesce(mp);
+		mp->m_flags |= XFS_MOUNT_RDONLY;
+	}
+
+ out_free_args:
 	kmem_free(args);
 	return -error;
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a005cebf504..e223aeab68b 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -701,30 +701,6 @@ xfs_attr_quiesce(
 	xfs_unmountfs_writesb(mp);
 }
 
-int
-xfs_mntupdate(
-	struct xfs_mount		*mp,
-	int				*flags,
-	struct xfs_mount_args		*args)
-{
-	if (!(*flags & MS_RDONLY)) {			/* rw/ro -> rw */
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			mp->m_flags &= ~XFS_MOUNT_RDONLY;
-		if (args->flags & XFSMNT_BARRIER) {
-			mp->m_flags |= XFS_MOUNT_BARRIER;
-			xfs_mountfs_check_barriers(mp);
-		} else {
-			mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		}
-	} else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {	/* rw -> ro */
-		xfs_filestream_flush(mp);
-		xfs_sync(mp, SYNC_DATA_QUIESCE);
-		xfs_attr_quiesce(mp);
-		mp->m_flags |= XFS_MOUNT_RDONLY;
-	}
-	return 0;
-}
-
 /*
  * xfs_unmount_flush implements a set of flush operation on special
  * inodes, which are needed as a separate set of operations so that
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55e..995091f1949 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -11,8 +11,6 @@ struct xfs_mount_args;
 int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
 		struct cred *credp);
 int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
-int xfs_mntupdate(struct xfs_mount *mp, int *flags,
-		struct xfs_mount_args *args);
 int xfs_sync(struct xfs_mount *mp, int flags);
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 		int lnnum);
-- 
cgit v1.2.3-70-g09d2


From 61436febae29085bffc7c291db03cbd709dc68a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:30:46 +1000
Subject: [XFS] kill xfs_igrow_start and xfs_igrow_finish

xfs_igrow_start just expands to xfs_zero_eof with two asserts that are
useless in the context of the only caller and some rather confusing
comments.

xfs_igrow_finish is just a few lines of code decorated again with useless
asserts and confusing comments.

Just kill those two and merge them into xfs_setattr.

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31186a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_inode.c    | 61 ---------------------------------------------------
 fs/xfs/xfs_inode.h    |  3 ---
 fs/xfs/xfs_vnodeops.c | 15 ++++++++++---
 3 files changed, 12 insertions(+), 67 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4b21490334b..199a36ac8e2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1763,67 +1763,6 @@ xfs_itruncate_finish(
 	return 0;
 }
 
-
-/*
- * xfs_igrow_start
- *
- * Do the first part of growing a file: zero any data in the last
- * block that is beyond the old EOF.  We need to do this before
- * the inode is joined to the transaction to modify the i_size.
- * That way we can drop the inode lock and call into the buffer
- * cache to get the buffer mapping the EOF.
- */
-int
-xfs_igrow_start(
-	xfs_inode_t	*ip,
-	xfs_fsize_t	new_size,
-	cred_t		*credp)
-{
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-	ASSERT(new_size > ip->i_size);
-
-	/*
-	 * Zero any pages that may have been created by
-	 * xfs_write_file() beyond the end of the file
-	 * and any blocks between the old and new file sizes.
-	 */
-	return xfs_zero_eof(ip, new_size, ip->i_size);
-}
-
-/*
- * xfs_igrow_finish
- *
- * This routine is called to extend the size of a file.
- * The inode must have both the iolock and the ilock locked
- * for update and it must be a part of the current transaction.
- * The xfs_igrow_start() function must have been called previously.
- * If the change_flag is not zero, the inode change timestamp will
- * be updated.
- */
-void
-xfs_igrow_finish(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_fsize_t	new_size,
-	int		change_flag)
-{
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-	ASSERT(ip->i_transp == tp);
-	ASSERT(new_size > ip->i_size);
-
-	/*
-	 * Update the file size.  Update the inode change timestamp
-	 * if change_flag set.
-	 */
-	ip->i_d.di_size = new_size;
-	ip->i_size = new_size;
-	if (change_flag)
-		xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-}
-
-
 /*
  * This is called when the inode's link count goes to 0.
  * We place the on-disk inode on a list in the AGI.  It
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f0..17a04b6321e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -507,9 +507,6 @@ int		xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
 int		xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
 				     xfs_fsize_t, int, int);
 int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-int		xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
-void		xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
-				 xfs_fsize_t, int);
 
 void		xfs_idestroy_fork(xfs_inode_t *, int);
 void		xfs_idestroy(xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb..9b8b87fcd4e 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -444,7 +444,13 @@ xfs_setattr(
 		code = 0;
 		if ((vap->va_size > ip->i_size) &&
 		    (flags & ATTR_NOSIZETOK) == 0) {
-			code = xfs_igrow_start(ip, vap->va_size, credp);
+			/*
+			 * Do the first part of growing a file: zero any data
+			 * in the last block that is beyond the old EOF.  We
+			 * need to do this before the inode is joined to the
+			 * transaction to modify the i_size.
+			 */
+			code = xfs_zero_eof(ip, vap->va_size, ip->i_size);
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
@@ -512,8 +518,11 @@ xfs_setattr(
 			timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
 
 		if (vap->va_size > ip->i_size) {
-			xfs_igrow_finish(tp, ip, vap->va_size,
-			    !(flags & ATTR_DMI));
+			ip->i_d.di_size = vap->va_size;
+			ip->i_size = vap->va_size;
+			if (!(flags & ATTR_DMI))
+				xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		} else if ((vap->va_size <= ip->i_size) ||
 			   ((vap->va_size == 0) && ip->i_d.di_nextents)) {
 			/*
-- 
cgit v1.2.3-70-g09d2


From e48ad3160e5c5f5b952c7a7ed814f6f289a60100 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:30:52 +1000
Subject: [XFS] merge xfs_unmount into xfs_fs_put_super / xfs_fs_fill_super

xfs_unmount is small and already pretty Linux specific, so merge it into
the callers. The real unmount path is simplified a little by doing a
WARN_ON on the xfs_unmount_flush retval directly instead of propagating
the error back to the caller, and the mout failure case in simplified
significantly by removing the forced shutdown case and all the dmapi
events that shouldn't be sent because the dmapi mount event hasn't been
sent by that time either.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31188a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 71 ++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_vfsops.c          | 87 --------------------------------------------
 fs/xfs/xfs_vfsops.h          |  1 -
 3 files changed, 67 insertions(+), 92 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a3ecdf44246..28132e64395 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1087,14 +1087,61 @@ xfs_fs_put_super(
 	struct super_block	*sb)
 {
 	struct xfs_mount	*mp = XFS_M(sb);
+	struct xfs_inode	*rip = mp->m_rootip;
+	int			unmount_event_flags = 0;
 	int			error;
 
 	kthread_stop(mp->m_sync_task);
 
 	xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
-	error = xfs_unmount(mp, 0, NULL);
-	if (error)
-		printk("XFS: unmount got error=%d\n", error);
+
+#ifdef HAVE_DMAPI
+	if (mp->m_flags & XFS_MOUNT_DMAPI) {
+		unmount_event_flags =
+			(mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
+				0 : DM_FLAGS_UNWANTED;
+		/*
+		 * Ignore error from dmapi here, first unmount is not allowed
+		 * to fail anyway, and second we wouldn't want to fail a
+		 * unmount because of dmapi.
+		 */
+		XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
+				NULL, NULL, 0, 0, unmount_event_flags);
+	}
+#endif
+
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
+	XFS_bflush(mp->m_ddev_targp);
+	error = xfs_unmount_flush(mp, 0);
+	WARN_ON(error);
+
+	IRELE(rip);
+
+	/*
+	 * If we're forcing a shutdown, typically because of a media error,
+	 * we want to make sure we invalidate dirty pages that belong to
+	 * referenced vnodes as well.
+	 */
+	if (XFS_FORCED_SHUTDOWN(mp)) {
+		error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
+		ASSERT(error != EFSCORRUPTED);
+	}
+
+	if (mp->m_flags & XFS_MOUNT_DMAPI) {
+		XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
+				unmount_event_flags);
+	}
+
+	xfs_unmountfs(mp, NULL);
+	xfs_qmops_put(mp);
+	xfs_dmops_put(mp);
+	kmem_free(mp);
 }
 
 STATIC void
@@ -1400,7 +1447,23 @@ fail_vnrele:
 	}
 
 fail_unmount:
-	xfs_unmount(mp, 0, NULL);
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
+	XFS_bflush(mp->m_ddev_targp);
+	error = xfs_unmount_flush(mp, 0);
+	WARN_ON(error);
+
+	IRELE(mp->m_rootip);
+
+	xfs_unmountfs(mp, NULL);
+	xfs_qmops_put(mp);
+	xfs_dmops_put(mp);
+	kmem_free(mp);
 
 fail_vfsop:
 	kmem_free(args);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index e223aeab68b..bc34f90e7ee 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -558,93 +558,6 @@ error0:
 	return error;
 }
 
-int
-xfs_unmount(
-	xfs_mount_t	*mp,
-	int		flags,
-	cred_t		*credp)
-{
-	xfs_inode_t	*rip;
-	bhv_vnode_t	*rvp;
-	int		unmount_event_wanted = 0;
-	int		unmount_event_flags = 0;
-	int		xfs_unmountfs_needed = 0;
-	int		error;
-
-	rip = mp->m_rootip;
-	rvp = XFS_ITOV(rip);
-
-#ifdef HAVE_DMAPI
-	if (mp->m_flags & XFS_MOUNT_DMAPI) {
-		error = XFS_SEND_PREUNMOUNT(mp,
-				rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
-				NULL, NULL, 0, 0,
-				(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
-					0:DM_FLAGS_UNWANTED);
-			if (error)
-				return XFS_ERROR(error);
-		unmount_event_wanted = 1;
-		unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
-					0 : DM_FLAGS_UNWANTED;
-	}
-#endif
-
-	/*
-	 * Blow away any referenced inode in the filestreams cache.
-	 * This can and will cause log traffic as inodes go inactive
-	 * here.
-	 */
-	xfs_filestream_unmount(mp);
-
-	XFS_bflush(mp->m_ddev_targp);
-	error = xfs_unmount_flush(mp, 0);
-	if (error)
-		goto out;
-
-	ASSERT(vn_count(rvp) == 1);
-
-	/*
-	 * Drop the reference count
-	 */
-	IRELE(rip);
-
-	/*
-	 * If we're forcing a shutdown, typically because of a media error,
-	 * we want to make sure we invalidate dirty pages that belong to
-	 * referenced vnodes as well.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
-		ASSERT(error != EFSCORRUPTED);
-	}
-	xfs_unmountfs_needed = 1;
-
-out:
-	/*	Send DMAPI event, if required.
-	 *	Then do xfs_unmountfs() if needed.
-	 *	Then return error (or zero).
-	 */
-	if (unmount_event_wanted) {
-		/* Note: mp structure must still exist for
-		 * XFS_SEND_UNMOUNT() call.
-		 */
-		XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
-			DM_RIGHT_NULL, 0, error, unmount_event_flags);
-	}
-	if (xfs_unmountfs_needed) {
-		/*
-		 * Call common unmount function to flush to disk
-		 * and free the super block buffer & mount structures.
-		 */
-		xfs_unmountfs(mp, credp);
-		xfs_qmops_put(mp);
-		xfs_dmops_put(mp);
-		kmem_free(mp);
-	}
-
-	return XFS_ERROR(error);
-}
-
 STATIC void
 xfs_quiesce_fs(
 	xfs_mount_t		*mp)
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 995091f1949..de64bb6542d 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -10,7 +10,6 @@ struct xfs_mount_args;
 
 int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
 		struct cred *credp);
-int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
 int xfs_sync(struct xfs_mount *mp, int flags);
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 		int lnnum);
-- 
cgit v1.2.3-70-g09d2


From f8f15e42b408edce6ca9e9d8bd0d0e2078a39efd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:30:59 +1000
Subject: [XFS] merge xfs_mount into xfs_fs_fill_super

xfs_mount is already pretty linux-specific so merge it into
xfs_fs_fill_super to allow for a more structured mount code in the next
patches. xfs_start_flags and xfs_finish_flags also move to xfs_super.c.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31189a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 360 ++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_vfsops.c          | 369 -------------------------------------------
 fs/xfs/xfs_vfsops.h          |   2 -
 3 files changed, 355 insertions(+), 376 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 28132e64395..4b6ddf88d44 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1366,6 +1366,235 @@ xfs_fs_setxquota(
 				   Q_XSETPQLIM), id, (caddr_t)fdq);
 }
 
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ */
+STATIC int
+xfs_start_flags(
+	struct xfs_mount_args	*ap,
+	struct xfs_mount	*mp)
+{
+	/* Values are in BBs */
+	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+		/*
+		 * At this point the superblock has not been read
+		 * in, therefore we do not know the block size.
+		 * Before the mount call ends we will convert
+		 * these to FSBs.
+		 */
+		mp->m_dalign = ap->sunit;
+		mp->m_swidth = ap->swidth;
+	}
+
+	if (ap->logbufs != -1 &&
+	    ap->logbufs != 0 &&
+	    (ap->logbufs < XLOG_MIN_ICLOGS ||
+	     ap->logbufs > XLOG_MAX_ICLOGS)) {
+		cmn_err(CE_WARN,
+			"XFS: invalid logbufs value: %d [not %d-%d]",
+			ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+		return XFS_ERROR(EINVAL);
+	}
+	mp->m_logbufs = ap->logbufs;
+	if (ap->logbufsize != -1 &&
+	    ap->logbufsize !=  0 &&
+	    (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
+	     ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
+	     !is_power_of_2(ap->logbufsize))) {
+		cmn_err(CE_WARN,
+	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+			ap->logbufsize);
+		return XFS_ERROR(EINVAL);
+	}
+	mp->m_logbsize = ap->logbufsize;
+	mp->m_fsname_len = strlen(ap->fsname) + 1;
+	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
+	strcpy(mp->m_fsname, ap->fsname);
+	if (ap->rtname[0]) {
+		mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
+		strcpy(mp->m_rtname, ap->rtname);
+	}
+	if (ap->logname[0]) {
+		mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
+		strcpy(mp->m_logname, ap->logname);
+	}
+
+	if (ap->flags & XFSMNT_WSYNC)
+		mp->m_flags |= XFS_MOUNT_WSYNC;
+#if XFS_BIG_INUMS
+	if (ap->flags & XFSMNT_INO64) {
+		mp->m_flags |= XFS_MOUNT_INO64;
+		mp->m_inoadd = XFS_INO64_OFFSET;
+	}
+#endif
+	if (ap->flags & XFSMNT_RETERR)
+		mp->m_flags |= XFS_MOUNT_RETERR;
+	if (ap->flags & XFSMNT_NOALIGN)
+		mp->m_flags |= XFS_MOUNT_NOALIGN;
+	if (ap->flags & XFSMNT_SWALLOC)
+		mp->m_flags |= XFS_MOUNT_SWALLOC;
+	if (ap->flags & XFSMNT_OSYNCISOSYNC)
+		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
+	if (ap->flags & XFSMNT_32BITINODES)
+		mp->m_flags |= XFS_MOUNT_32BITINODES;
+
+	if (ap->flags & XFSMNT_IOSIZE) {
+		if (ap->iosizelog > XFS_MAX_IO_LOG ||
+		    ap->iosizelog < XFS_MIN_IO_LOG) {
+			cmn_err(CE_WARN,
+		"XFS: invalid log iosize: %d [not %d-%d]",
+				ap->iosizelog, XFS_MIN_IO_LOG,
+				XFS_MAX_IO_LOG);
+			return XFS_ERROR(EINVAL);
+		}
+
+		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+		mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
+	}
+
+	if (ap->flags & XFSMNT_IKEEP)
+		mp->m_flags |= XFS_MOUNT_IKEEP;
+	if (ap->flags & XFSMNT_DIRSYNC)
+		mp->m_flags |= XFS_MOUNT_DIRSYNC;
+	if (ap->flags & XFSMNT_ATTR2)
+		mp->m_flags |= XFS_MOUNT_ATTR2;
+	if (ap->flags & XFSMNT_NOATTR2)
+		mp->m_flags |= XFS_MOUNT_NOATTR2;
+
+	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
+		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+
+	/*
+	 * no recovery flag requires a read-only mount
+	 */
+	if (ap->flags & XFSMNT_NORECOVERY) {
+		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+			cmn_err(CE_WARN,
+	"XFS: tried to mount a FS read-write without recovery!");
+			return XFS_ERROR(EINVAL);
+		}
+		mp->m_flags |= XFS_MOUNT_NORECOVERY;
+	}
+
+	if (ap->flags & XFSMNT_NOUUID)
+		mp->m_flags |= XFS_MOUNT_NOUUID;
+	if (ap->flags & XFSMNT_BARRIER)
+		mp->m_flags |= XFS_MOUNT_BARRIER;
+	else
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+
+	if (ap->flags2 & XFSMNT2_FILESTREAMS)
+		mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+
+	if (ap->flags & XFSMNT_DMAPI)
+		mp->m_flags |= XFS_MOUNT_DMAPI;
+	return 0;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+	struct xfs_mount_args	*ap,
+	struct xfs_mount	*mp)
+{
+	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+	/* Fail a mount where the logbuf is smaller then the log stripe */
+	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+		if ((ap->logbufsize <= 0) &&
+		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+			mp->m_logbsize = mp->m_sb.sb_logsunit;
+		} else if (ap->logbufsize > 0 &&
+			   ap->logbufsize < mp->m_sb.sb_logsunit) {
+			cmn_err(CE_WARN,
+	"XFS: logbuf size must be greater than or equal to log stripe size");
+			return XFS_ERROR(EINVAL);
+		}
+	} else {
+		/* Fail a mount if the logbuf is larger than 32K */
+		if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+			cmn_err(CE_WARN,
+	"XFS: logbuf size for version 1 logs must be 16K or 32K");
+			return XFS_ERROR(EINVAL);
+		}
+	}
+
+	/*
+	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+	 * told by noattr2 to turn it off
+	 */
+	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+	    !(ap->flags & XFSMNT_NOATTR2))
+		mp->m_flags |= XFS_MOUNT_ATTR2;
+
+	/*
+	 * prohibit r/w mounts of read-only filesystems
+	 */
+	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+		cmn_err(CE_WARN,
+	"XFS: cannot mount a read-only filesystem as read-write");
+		return XFS_ERROR(EROFS);
+	}
+
+	/*
+	 * check for shared mount.
+	 */
+	if (ap->flags & XFSMNT_SHARED) {
+		if (!xfs_sb_version_hasshared(&mp->m_sb))
+			return XFS_ERROR(EINVAL);
+
+		/*
+		 * For IRIX 6.5, shared mounts must have the shared
+		 * version bit set, have the persistent readonly
+		 * field set, must be version 0 and can only be mounted
+		 * read-only.
+		 */
+		if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
+		     (mp->m_sb.sb_shared_vn != 0))
+			return XFS_ERROR(EINVAL);
+
+		mp->m_flags |= XFS_MOUNT_SHARED;
+
+		/*
+		 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
+		 */
+		if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+			return XFS_ERROR(EINVAL);
+	}
+
+	if (ap->flags & XFSMNT_UQUOTA) {
+		mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+		if (ap->flags & XFSMNT_UQUOTAENF)
+			mp->m_qflags |= XFS_UQUOTA_ENFD;
+	}
+
+	if (ap->flags & XFSMNT_GQUOTA) {
+		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+		if (ap->flags & XFSMNT_GQUOTAENF)
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
+	} else if (ap->flags & XFSMNT_PQUOTA) {
+		mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+		if (ap->flags & XFSMNT_PQUOTAENF)
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
+	}
+
+	return 0;
+}
+
+/*
+ * The file system configurations are:
+ *	(1) device (partition) with data and internal log
+ *	(2) logical volume with data and log subvolumes.
+ *	(3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
+ */
 STATIC int
 xfs_fs_fill_super(
 	struct super_block	*sb,
@@ -1375,7 +1604,9 @@ xfs_fs_fill_super(
 	struct inode		*root;
 	struct xfs_mount	*mp = NULL;
 	struct xfs_mount_args	*args = xfs_args_allocate(sb, silent);
-	int			error;
+	struct block_device	*ddev = sb->s_bdev;
+	struct block_device	*logdev = NULL, *rtdev = NULL;
+	int			flags = 0, error;
 
 	mp = xfs_mount_init();
 
@@ -1398,10 +1629,114 @@ xfs_fs_fill_super(
 	sb->s_qcop = &xfs_quotactl_operations;
 	sb->s_op = &xfs_super_operations;
 
-	error = xfs_mount(mp, args, NULL);
+	error = xfs_dmops_get(mp, args);
+	if (error)
+		goto fail_vfsop;
+	error = xfs_qmops_get(mp, args);
 	if (error)
 		goto fail_vfsop;
 
+	if (args->flags & XFSMNT_QUIET)
+		flags |= XFS_MFSI_QUIET;
+
+	/*
+	 * Open real time and log devices - order is important.
+	 */
+	if (args->logname[0]) {
+		error = xfs_blkdev_get(mp, args->logname, &logdev);
+		if (error)
+			goto fail_vfsop;
+	}
+	if (args->rtname[0]) {
+		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+		if (error) {
+			xfs_blkdev_put(logdev);
+			goto fail_vfsop;
+		}
+
+		if (rtdev == ddev || rtdev == logdev) {
+			cmn_err(CE_WARN,
+	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+			xfs_blkdev_put(logdev);
+			xfs_blkdev_put(rtdev);
+			error = EINVAL;
+			goto fail_vfsop;
+		}
+	}
+
+	/*
+	 * Setup xfs_mount buffer target pointers
+	 */
+	error = ENOMEM;
+	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+	if (!mp->m_ddev_targp) {
+		xfs_blkdev_put(logdev);
+		xfs_blkdev_put(rtdev);
+		goto fail_vfsop;
+	}
+	if (rtdev) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+		if (!mp->m_rtdev_targp) {
+			xfs_blkdev_put(logdev);
+			xfs_blkdev_put(rtdev);
+			goto error0;
+		}
+	}
+	mp->m_logdev_targp = (logdev && logdev != ddev) ?
+				xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
+	if (!mp->m_logdev_targp) {
+		xfs_blkdev_put(logdev);
+		xfs_blkdev_put(rtdev);
+		goto error0;
+	}
+
+	/*
+	 * Setup flags based on mount(2) options and then the superblock
+	 */
+	error = xfs_start_flags(args, mp);
+	if (error)
+		goto error1;
+	error = xfs_readsb(mp, flags);
+	if (error)
+		goto error1;
+	error = xfs_finish_flags(args, mp);
+	if (error)
+		goto error2;
+
+	/*
+	 * Setup xfs_mount buffer target pointers based on superblock
+	 */
+	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+				    mp->m_sb.sb_sectsize);
+	if (!error && logdev && logdev != ddev) {
+		unsigned int	log_sector_size = BBSIZE;
+
+		if (xfs_sb_version_hassector(&mp->m_sb))
+			log_sector_size = mp->m_sb.sb_logsectsize;
+		error = xfs_setsize_buftarg(mp->m_logdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    log_sector_size);
+	}
+	if (!error && rtdev)
+		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    mp->m_sb.sb_sectsize);
+	if (error)
+		goto error2;
+
+	if (mp->m_flags & XFS_MOUNT_BARRIER)
+		xfs_mountfs_check_barriers(mp);
+
+	error = xfs_filestream_mount(mp);
+	if (error)
+		goto error2;
+
+	error = xfs_mountfs(mp, flags);
+	if (error)
+		goto error2;
+
+	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
+
 	sb->s_dirt = 1;
 	sb->s_magic = XFS_SB_MAGIC;
 	sb->s_blocksize = mp->m_sb.sb_blocksize;
@@ -1438,7 +1773,22 @@ xfs_fs_fill_super(
 	kmem_free(args);
 	return 0;
 
-fail_vnrele:
+ error2:
+	if (mp->m_sb_bp)
+		xfs_freesb(mp);
+ error1:
+	xfs_binval(mp->m_ddev_targp);
+	if (logdev && logdev != ddev)
+		xfs_binval(mp->m_logdev_targp);
+	if (rtdev)
+		xfs_binval(mp->m_rtdev_targp);
+ error0:
+	xfs_unmountfs_close(mp, NULL);
+	xfs_qmops_put(mp);
+	xfs_dmops_put(mp);
+	goto fail_vfsop;
+
+ fail_vnrele:
 	if (sb->s_root) {
 		dput(sb->s_root);
 		sb->s_root = NULL;
@@ -1446,7 +1796,7 @@ fail_vnrele:
 		iput(root);
 	}
 
-fail_unmount:
+ fail_unmount:
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
 	 * This can and will cause log traffic as inodes go inactive
@@ -1465,7 +1815,7 @@ fail_unmount:
 	xfs_dmops_put(mp);
 	kmem_free(mp);
 
-fail_vfsop:
+ fail_vfsop:
 	kmem_free(args);
 	return -error;
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index bc34f90e7ee..8b5a3376c2f 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -189,375 +189,6 @@ xfs_cleanup(void)
 	kmem_zone_destroy(xfs_log_ticket_zone);
 }
 
-/*
- * xfs_start_flags
- *
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
-	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp)
-{
-	/* Values are in BBs */
-	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
-		/*
-		 * At this point the superblock has not been read
-		 * in, therefore we do not know the block size.
-		 * Before the mount call ends we will convert
-		 * these to FSBs.
-		 */
-		mp->m_dalign = ap->sunit;
-		mp->m_swidth = ap->swidth;
-	}
-
-	if (ap->logbufs != -1 &&
-	    ap->logbufs != 0 &&
-	    (ap->logbufs < XLOG_MIN_ICLOGS ||
-	     ap->logbufs > XLOG_MAX_ICLOGS)) {
-		cmn_err(CE_WARN,
-			"XFS: invalid logbufs value: %d [not %d-%d]",
-			ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-		return XFS_ERROR(EINVAL);
-	}
-	mp->m_logbufs = ap->logbufs;
-	if (ap->logbufsize != -1 &&
-	    ap->logbufsize !=  0 &&
-	    (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
-	     ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
-	     !is_power_of_2(ap->logbufsize))) {
-		cmn_err(CE_WARN,
-	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-			ap->logbufsize);
-		return XFS_ERROR(EINVAL);
-	}
-	mp->m_logbsize = ap->logbufsize;
-	mp->m_fsname_len = strlen(ap->fsname) + 1;
-	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
-	strcpy(mp->m_fsname, ap->fsname);
-	if (ap->rtname[0]) {
-		mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
-		strcpy(mp->m_rtname, ap->rtname);
-	}
-	if (ap->logname[0]) {
-		mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
-		strcpy(mp->m_logname, ap->logname);
-	}
-
-	if (ap->flags & XFSMNT_WSYNC)
-		mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
-	if (ap->flags & XFSMNT_INO64) {
-		mp->m_flags |= XFS_MOUNT_INO64;
-		mp->m_inoadd = XFS_INO64_OFFSET;
-	}
-#endif
-	if (ap->flags & XFSMNT_RETERR)
-		mp->m_flags |= XFS_MOUNT_RETERR;
-	if (ap->flags & XFSMNT_NOALIGN)
-		mp->m_flags |= XFS_MOUNT_NOALIGN;
-	if (ap->flags & XFSMNT_SWALLOC)
-		mp->m_flags |= XFS_MOUNT_SWALLOC;
-	if (ap->flags & XFSMNT_OSYNCISOSYNC)
-		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
-	if (ap->flags & XFSMNT_32BITINODES)
-		mp->m_flags |= XFS_MOUNT_32BITINODES;
-
-	if (ap->flags & XFSMNT_IOSIZE) {
-		if (ap->iosizelog > XFS_MAX_IO_LOG ||
-		    ap->iosizelog < XFS_MIN_IO_LOG) {
-			cmn_err(CE_WARN,
-		"XFS: invalid log iosize: %d [not %d-%d]",
-				ap->iosizelog, XFS_MIN_IO_LOG,
-				XFS_MAX_IO_LOG);
-			return XFS_ERROR(EINVAL);
-		}
-
-		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-		mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
-	}
-
-	if (ap->flags & XFSMNT_IKEEP)
-		mp->m_flags |= XFS_MOUNT_IKEEP;
-	if (ap->flags & XFSMNT_DIRSYNC)
-		mp->m_flags |= XFS_MOUNT_DIRSYNC;
-	if (ap->flags & XFSMNT_ATTR2)
-		mp->m_flags |= XFS_MOUNT_ATTR2;
-	if (ap->flags & XFSMNT_NOATTR2)
-		mp->m_flags |= XFS_MOUNT_NOATTR2;
-
-	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
-		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
-	/*
-	 * no recovery flag requires a read-only mount
-	 */
-	if (ap->flags & XFSMNT_NORECOVERY) {
-		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-			cmn_err(CE_WARN,
-	"XFS: tried to mount a FS read-write without recovery!");
-			return XFS_ERROR(EINVAL);
-		}
-		mp->m_flags |= XFS_MOUNT_NORECOVERY;
-	}
-
-	if (ap->flags & XFSMNT_NOUUID)
-		mp->m_flags |= XFS_MOUNT_NOUUID;
-	if (ap->flags & XFSMNT_BARRIER)
-		mp->m_flags |= XFS_MOUNT_BARRIER;
-	else
-		mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
-	if (ap->flags2 & XFSMNT2_FILESTREAMS)
-		mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
-	if (ap->flags & XFSMNT_DMAPI)
-		mp->m_flags |= XFS_MOUNT_DMAPI;
-	return 0;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp)
-{
-	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-	/* Fail a mount where the logbuf is smaller then the log stripe */
-	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-		if ((ap->logbufsize <= 0) &&
-		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
-			mp->m_logbsize = mp->m_sb.sb_logsunit;
-		} else if (ap->logbufsize > 0 &&
-			   ap->logbufsize < mp->m_sb.sb_logsunit) {
-			cmn_err(CE_WARN,
-	"XFS: logbuf size must be greater than or equal to log stripe size");
-			return XFS_ERROR(EINVAL);
-		}
-	} else {
-		/* Fail a mount if the logbuf is larger than 32K */
-		if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
-			cmn_err(CE_WARN,
-	"XFS: logbuf size for version 1 logs must be 16K or 32K");
-			return XFS_ERROR(EINVAL);
-		}
-	}
-
-	/*
-	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-	 * told by noattr2 to turn it off
-	 */
-	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-	    !(ap->flags & XFSMNT_NOATTR2))
-		mp->m_flags |= XFS_MOUNT_ATTR2;
-
-	/*
-	 * prohibit r/w mounts of read-only filesystems
-	 */
-	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-		cmn_err(CE_WARN,
-	"XFS: cannot mount a read-only filesystem as read-write");
-		return XFS_ERROR(EROFS);
-	}
-
-	/*
-	 * check for shared mount.
-	 */
-	if (ap->flags & XFSMNT_SHARED) {
-		if (!xfs_sb_version_hasshared(&mp->m_sb))
-			return XFS_ERROR(EINVAL);
-
-		/*
-		 * For IRIX 6.5, shared mounts must have the shared
-		 * version bit set, have the persistent readonly
-		 * field set, must be version 0 and can only be mounted
-		 * read-only.
-		 */
-		if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
-		     (mp->m_sb.sb_shared_vn != 0))
-			return XFS_ERROR(EINVAL);
-
-		mp->m_flags |= XFS_MOUNT_SHARED;
-
-		/*
-		 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
-		 */
-		if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
-			return XFS_ERROR(EINVAL);
-	}
-
-	if (ap->flags & XFSMNT_UQUOTA) {
-		mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-		if (ap->flags & XFSMNT_UQUOTAENF)
-			mp->m_qflags |= XFS_UQUOTA_ENFD;
-	}
-
-	if (ap->flags & XFSMNT_GQUOTA) {
-		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-		if (ap->flags & XFSMNT_GQUOTAENF)
-			mp->m_qflags |= XFS_OQUOTA_ENFD;
-	} else if (ap->flags & XFSMNT_PQUOTA) {
-		mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-		if (ap->flags & XFSMNT_PQUOTAENF)
-			mp->m_qflags |= XFS_OQUOTA_ENFD;
-	}
-
-	return 0;
-}
-
-/*
- * xfs_mount
- *
- * The file system configurations are:
- *	(1) device (partition) with data and internal log
- *	(2) logical volume with data and log subvolumes.
- *	(3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
-int
-xfs_mount(
-	struct xfs_mount	*mp,
-	struct xfs_mount_args	*args,
-	cred_t			*credp)
-{
-	struct block_device	*ddev, *logdev, *rtdev;
-	int			flags = 0, error;
-
-	ddev = mp->m_super->s_bdev;
-	logdev = rtdev = NULL;
-
-	error = xfs_dmops_get(mp, args);
-	if (error)
-		return error;
-	error = xfs_qmops_get(mp, args);
-	if (error)
-		return error;
-
-	if (args->flags & XFSMNT_QUIET)
-		flags |= XFS_MFSI_QUIET;
-
-	/*
-	 * Open real time and log devices - order is important.
-	 */
-	if (args->logname[0]) {
-		error = xfs_blkdev_get(mp, args->logname, &logdev);
-		if (error)
-			return error;
-	}
-	if (args->rtname[0]) {
-		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
-		if (error) {
-			xfs_blkdev_put(logdev);
-			return error;
-		}
-
-		if (rtdev == ddev || rtdev == logdev) {
-			cmn_err(CE_WARN,
-	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-			xfs_blkdev_put(logdev);
-			xfs_blkdev_put(rtdev);
-			return EINVAL;
-		}
-	}
-
-	/*
-	 * Setup xfs_mount buffer target pointers
-	 */
-	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
-	if (!mp->m_ddev_targp) {
-		xfs_blkdev_put(logdev);
-		xfs_blkdev_put(rtdev);
-		return error;
-	}
-	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
-		if (!mp->m_rtdev_targp) {
-			xfs_blkdev_put(logdev);
-			xfs_blkdev_put(rtdev);
-			goto error0;
-		}
-	}
-	mp->m_logdev_targp = (logdev && logdev != ddev) ?
-				xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
-	if (!mp->m_logdev_targp) {
-		xfs_blkdev_put(logdev);
-		xfs_blkdev_put(rtdev);
-		goto error0;
-	}
-
-	/*
-	 * Setup flags based on mount(2) options and then the superblock
-	 */
-	error = xfs_start_flags(args, mp);
-	if (error)
-		goto error1;
-	error = xfs_readsb(mp, flags);
-	if (error)
-		goto error1;
-	error = xfs_finish_flags(args, mp);
-	if (error)
-		goto error2;
-
-	/*
-	 * Setup xfs_mount buffer target pointers based on superblock
-	 */
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-				    mp->m_sb.sb_sectsize);
-	if (!error && logdev && logdev != ddev) {
-		unsigned int	log_sector_size = BBSIZE;
-
-		if (xfs_sb_version_hassector(&mp->m_sb))
-			log_sector_size = mp->m_sb.sb_logsectsize;
-		error = xfs_setsize_buftarg(mp->m_logdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    log_sector_size);
-	}
-	if (!error && rtdev)
-		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    mp->m_sb.sb_sectsize);
-	if (error)
-		goto error2;
-
-	if (mp->m_flags & XFS_MOUNT_BARRIER)
-		xfs_mountfs_check_barriers(mp);
-
-	if ((error = xfs_filestream_mount(mp)))
-		goto error2;
-
-	error = xfs_mountfs(mp, flags);
-	if (error)
-		goto error2;
-
-	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
-
-	return 0;
-
-error2:
-	if (mp->m_sb_bp)
-		xfs_freesb(mp);
-error1:
-	xfs_binval(mp->m_ddev_targp);
-	if (logdev && logdev != ddev)
-		xfs_binval(mp->m_logdev_targp);
-	if (rtdev)
-		xfs_binval(mp->m_rtdev_targp);
-error0:
-	xfs_unmountfs_close(mp, credp);
-	xfs_qmops_put(mp);
-	xfs_dmops_put(mp);
-	return error;
-}
-
 STATIC void
 xfs_quiesce_fs(
 	xfs_mount_t		*mp)
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index de64bb6542d..a74b05087da 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,8 +8,6 @@ struct kstatfs;
 struct xfs_mount;
 struct xfs_mount_args;
 
-int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
-		struct cred *credp);
 int xfs_sync(struct xfs_mount *mp, int flags);
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 		int lnnum);
-- 
cgit v1.2.3-70-g09d2


From af15b8953a60d336aade96a2c162abffdba75ec9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:31:05 +1000
Subject: [XFS] don't call xfs_freesb from xfs_mountfs failure case

Freeing of the superblock is already handled in the caller, and that is
more symmetric with the mount path, too.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31192a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_mount.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fca3f8af674..ee5df5fae82 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1268,7 +1268,6 @@ xfs_mountfs(
  error1:
 	if (uuid_mounted)
 		uuid_table_remove(&mp->m_sb.sb_uuid);
-	xfs_freesb(mp);
 	return error;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 19f354d4c3f4c48bf6b2a86227d8e3050e5f7d50 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 11:31:13 +1000
Subject: [XFS] sort out opening and closing of the block devices

Currently closing the rt/log block device is done in the wrong spot, and
far too early. So revampt it:

- xfs_blkdev_put moved out of xfs_free_buftarg into the caller so that

it is done after tearing down the buftarg completely.

- call to xfs_unmountfs_close moved from xfs_mountfs into caller so

that it's done after tearing down the filesystem completely.

- xfs_unmountfs_close is renamed to xfs_close_devices and made static

in xfs_super.c

- opening of the block devices is split into a helper xfs_open_devices

that is symetric in use to xfs_close_devices

- xfs_unmountfs can now lose struct cred

- error handling around device opening sanitized in xfs_fs_fill_super

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31193a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c   |   5 +-
 fs/xfs/linux-2.6/xfs_buf.h   |   2 +-
 fs/xfs/linux-2.6/xfs_super.c | 192 +++++++++++++++++++++++++++----------------
 fs/xfs/linux-2.6/xfs_super.h |   3 -
 fs/xfs/xfs_mount.c           |  13 +--
 fs/xfs/xfs_mount.h           |   3 +-
 6 files changed, 123 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ed03c6d3c9c..9cc8f021309 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1427,13 +1427,10 @@ xfs_unregister_buftarg(
 
 void
 xfs_free_buftarg(
-	xfs_buftarg_t		*btp,
-	int			external)
+	xfs_buftarg_t		*btp)
 {
 	xfs_flush_buftarg(btp, 1);
 	xfs_blkdev_issue_flush(btp);
-	if (external)
-		xfs_blkdev_put(btp->bt_bdev);
 	xfs_free_bufhash(btp);
 	iput(btp->bt_mapping->host);
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a..29d1d4adc07 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
  *	Handling of buftargs.
  */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4b6ddf88d44..055faa06ca2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -766,6 +766,103 @@ xfs_blkdev_issue_flush(
 	blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 
+STATIC void
+xfs_close_devices(
+	struct xfs_mount	*mp)
+{
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+		xfs_free_buftarg(mp->m_logdev_targp);
+		xfs_blkdev_put(mp->m_logdev_targp->bt_bdev);
+	}
+	if (mp->m_rtdev_targp) {
+		xfs_free_buftarg(mp->m_rtdev_targp);
+		xfs_blkdev_put(mp->m_rtdev_targp->bt_bdev);
+	}
+	xfs_free_buftarg(mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *	(1) device (partition) with data and internal log
+ *	(2) logical volume with data and log subvolumes.
+ *	(3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+	struct xfs_mount	*mp,
+	struct xfs_mount_args	*args)
+{
+	struct block_device	*ddev = mp->m_super->s_bdev;
+	struct block_device	*logdev = NULL, *rtdev = NULL;
+	int			error;
+
+	/*
+	 * Open real time and log devices - order is important.
+	 */
+	if (args->logname[0]) {
+		error = xfs_blkdev_get(mp, args->logname, &logdev);
+		if (error)
+			goto out;
+	}
+
+	if (args->rtname[0]) {
+		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+		if (error)
+			goto out_close_logdev;
+
+		if (rtdev == ddev || rtdev == logdev) {
+			cmn_err(CE_WARN,
+	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+			error = EINVAL;
+			goto out_close_rtdev;
+		}
+	}
+
+	/*
+	 * Setup xfs_mount buffer target pointers
+	 */
+	error = ENOMEM;
+	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+	if (!mp->m_ddev_targp)
+		goto out_close_rtdev;
+
+	if (rtdev) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+		if (!mp->m_rtdev_targp)
+			goto out_free_ddev_targ;
+	}
+
+	if (logdev && logdev != ddev) {
+		mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+		if (!mp->m_logdev_targp)
+			goto out_free_rtdev_targ;
+	} else {
+		mp->m_logdev_targp = mp->m_ddev_targp;
+	}
+
+	return 0;
+
+ out_free_rtdev_targ:
+	if (mp->m_rtdev_targp)
+		xfs_free_buftarg(mp->m_rtdev_targp);
+ out_free_ddev_targ:
+	xfs_free_buftarg(mp->m_ddev_targp);
+ out_close_rtdev:
+	if (rtdev)
+		xfs_blkdev_put(rtdev);
+ out_close_logdev:
+	if (logdev && logdev != ddev)
+		xfs_blkdev_put(logdev);
+ out:
+	return error;
+}
+
+
+
 /*
  * XFS AIL push thread support
  */
@@ -1138,7 +1235,8 @@ xfs_fs_put_super(
 				unmount_event_flags);
 	}
 
-	xfs_unmountfs(mp, NULL);
+	xfs_unmountfs(mp);
+	xfs_close_devices(mp);
 	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
 	kmem_free(mp);
@@ -1585,16 +1683,6 @@ xfs_finish_flags(
 	return 0;
 }
 
-/*
- * The file system configurations are:
- *	(1) device (partition) with data and internal log
- *	(2) logical volume with data and log subvolumes.
- *	(3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
 STATIC int
 xfs_fs_fill_super(
 	struct super_block	*sb,
@@ -1604,8 +1692,6 @@ xfs_fs_fill_super(
 	struct inode		*root;
 	struct xfs_mount	*mp = NULL;
 	struct xfs_mount_args	*args = xfs_args_allocate(sb, silent);
-	struct block_device	*ddev = sb->s_bdev;
-	struct block_device	*logdev = NULL, *rtdev = NULL;
 	int			flags = 0, error;
 
 	mp = xfs_mount_init();
@@ -1634,61 +1720,14 @@ xfs_fs_fill_super(
 		goto fail_vfsop;
 	error = xfs_qmops_get(mp, args);
 	if (error)
-		goto fail_vfsop;
+		goto out_put_dmops;
 
 	if (args->flags & XFSMNT_QUIET)
 		flags |= XFS_MFSI_QUIET;
 
-	/*
-	 * Open real time and log devices - order is important.
-	 */
-	if (args->logname[0]) {
-		error = xfs_blkdev_get(mp, args->logname, &logdev);
-		if (error)
-			goto fail_vfsop;
-	}
-	if (args->rtname[0]) {
-		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
-		if (error) {
-			xfs_blkdev_put(logdev);
-			goto fail_vfsop;
-		}
-
-		if (rtdev == ddev || rtdev == logdev) {
-			cmn_err(CE_WARN,
-	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-			xfs_blkdev_put(logdev);
-			xfs_blkdev_put(rtdev);
-			error = EINVAL;
-			goto fail_vfsop;
-		}
-	}
-
-	/*
-	 * Setup xfs_mount buffer target pointers
-	 */
-	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
-	if (!mp->m_ddev_targp) {
-		xfs_blkdev_put(logdev);
-		xfs_blkdev_put(rtdev);
-		goto fail_vfsop;
-	}
-	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
-		if (!mp->m_rtdev_targp) {
-			xfs_blkdev_put(logdev);
-			xfs_blkdev_put(rtdev);
-			goto error0;
-		}
-	}
-	mp->m_logdev_targp = (logdev && logdev != ddev) ?
-				xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
-	if (!mp->m_logdev_targp) {
-		xfs_blkdev_put(logdev);
-		xfs_blkdev_put(rtdev);
-		goto error0;
-	}
+	error = xfs_open_devices(mp, args);
+	if (error)
+		goto out_put_qmops;
 
 	/*
 	 * Setup flags based on mount(2) options and then the superblock
@@ -1708,7 +1747,9 @@ xfs_fs_fill_super(
 	 */
 	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
 				    mp->m_sb.sb_sectsize);
-	if (!error && logdev && logdev != ddev) {
+	if (error)
+		goto error2;
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 		unsigned int	log_sector_size = BBSIZE;
 
 		if (xfs_sb_version_hassector(&mp->m_sb))
@@ -1716,13 +1757,16 @@ xfs_fs_fill_super(
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
 					    mp->m_sb.sb_blocksize,
 					    log_sector_size);
+		if (error)
+			goto error2;
 	}
-	if (!error && rtdev)
+	if (mp->m_rtdev_targp) {
 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
 					    mp->m_sb.sb_blocksize,
 					    mp->m_sb.sb_sectsize);
-	if (error)
-		goto error2;
+		if (error)
+			goto error2;
+	}
 
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
@@ -1778,13 +1822,14 @@ xfs_fs_fill_super(
 		xfs_freesb(mp);
  error1:
 	xfs_binval(mp->m_ddev_targp);
-	if (logdev && logdev != ddev)
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
 		xfs_binval(mp->m_logdev_targp);
-	if (rtdev)
+	if (mp->m_rtdev_targp)
 		xfs_binval(mp->m_rtdev_targp);
- error0:
-	xfs_unmountfs_close(mp, NULL);
+	xfs_close_devices(mp);
+ out_put_qmops:
 	xfs_qmops_put(mp);
+ out_put_dmops:
 	xfs_dmops_put(mp);
 	goto fail_vfsop;
 
@@ -1810,7 +1855,8 @@ xfs_fs_fill_super(
 
 	IRELE(mp->m_rootip);
 
-	xfs_unmountfs(mp, NULL);
+	xfs_unmountfs(mp);
+	xfs_close_devices(mp);
 	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
 	kmem_free(mp);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d330..212bdc7a789 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -107,9 +107,6 @@ extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
 extern void xfs_flush_inode(struct xfs_inode *);
 extern void xfs_flush_device(struct xfs_inode *);
 
-extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
-				struct block_device **);
-extern void xfs_blkdev_put(struct block_device *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index ee5df5fae82..c67f8a9ae41 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1278,7 +1278,7 @@ xfs_mountfs(
  * log and makes sure that incore structures are freed.
  */
 int
-xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
+xfs_unmountfs(xfs_mount_t *mp)
 {
 	__uint64_t	resblks;
 	int		error = 0;
@@ -1345,7 +1345,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	 */
 	ASSERT(mp->m_inodes == NULL);
 
-	xfs_unmountfs_close(mp, cr);
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
 		uuid_table_remove(&mp->m_sb.sb_uuid);
 
@@ -1356,16 +1355,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	return 0;
 }
 
-void
-xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
-{
-	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
-		xfs_free_buftarg(mp->m_logdev_targp, 1);
-	if (mp->m_rtdev_targp)
-		xfs_free_buftarg(mp->m_rtdev_targp, 1);
-	xfs_free_buftarg(mp->m_ddev_targp, 0);
-}
-
 STATIC void
 xfs_unmountfs_wait(xfs_mount_t *mp)
 {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4aff0c125ad..5acde7fd9c0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -518,8 +518,7 @@ extern void	xfs_mount_free(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp, int);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
 
-extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
-extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
+extern int	xfs_unmountfs(xfs_mount_t *);
 extern int	xfs_unmountfs_writesb(xfs_mount_t *);
 extern int	xfs_unmount_flush(xfs_mount_t *, int);
 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-- 
cgit v1.2.3-70-g09d2


From e34b562c6bbffc3c466251ffa1d2adaf163db566 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:10:36 +1000
Subject: [XFS] add xfs_setup_devices helper

Split setting the block and sector size out of xfs_fs_fill_super into a
small helper to make xfs_fs_fill_super more readable.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31194a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 58 ++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 055faa06ca2..613370f9a4b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -861,7 +861,41 @@ xfs_open_devices(
 	return error;
 }
 
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+	struct xfs_mount	*mp)
+{
+	int			error;
 
+	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+				    mp->m_sb.sb_sectsize);
+	if (error)
+		return error;
+
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+		unsigned int	log_sector_size = BBSIZE;
+
+		if (xfs_sb_version_hassector(&mp->m_sb))
+			log_sector_size = mp->m_sb.sb_logsectsize;
+		error = xfs_setsize_buftarg(mp->m_logdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    log_sector_size);
+		if (error)
+			return error;
+	}
+	if (mp->m_rtdev_targp) {
+		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    mp->m_sb.sb_sectsize);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
 
 /*
  * XFS AIL push thread support
@@ -1742,31 +1776,9 @@ xfs_fs_fill_super(
 	if (error)
 		goto error2;
 
-	/*
-	 * Setup xfs_mount buffer target pointers based on superblock
-	 */
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-				    mp->m_sb.sb_sectsize);
+	error = xfs_setup_devices(mp);
 	if (error)
 		goto error2;
-	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-		unsigned int	log_sector_size = BBSIZE;
-
-		if (xfs_sb_version_hassector(&mp->m_sb))
-			log_sector_size = mp->m_sb.sb_logsectsize;
-		error = xfs_setsize_buftarg(mp->m_logdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    log_sector_size);
-		if (error)
-			goto error2;
-	}
-	if (mp->m_rtdev_targp) {
-		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    mp->m_sb.sb_sectsize);
-		if (error)
-			goto error2;
-	}
 
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
-- 
cgit v1.2.3-70-g09d2


From bdd907bab78419f34113c51470192945741b839e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:10:44 +1000
Subject: [XFS] allow xfs_args_allocate to fail

Switch xfs_args_allocate to kzalloc and handle failures.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31195a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 613370f9a4b..d13d883d003 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -75,7 +75,10 @@ xfs_args_allocate(
 {
 	struct xfs_mount_args	*args;
 
-	args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+	args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
+	if (!args)
+		return NULL;
+
 	args->logbufs = args->logbufsize = -1;
 	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
 
@@ -1396,9 +1399,13 @@ xfs_fs_remount(
 	char			*options)
 {
 	struct xfs_mount	*mp = XFS_M(sb);
-	struct xfs_mount_args	*args = xfs_args_allocate(sb, 0);
+	struct xfs_mount_args	*args;
 	int			error;
 
+	args = xfs_args_allocate(sb, 0);
+	if (!args)
+		return -ENOMEM;
+
 	error = xfs_parseargs(mp, options, args, 1);
 	if (error)
 		goto out_free_args;
@@ -1420,7 +1427,7 @@ xfs_fs_remount(
 	}
 
  out_free_args:
-	kmem_free(args);
+	kfree(args);
 	return -error;
 }
 
@@ -1725,9 +1732,13 @@ xfs_fs_fill_super(
 {
 	struct inode		*root;
 	struct xfs_mount	*mp = NULL;
-	struct xfs_mount_args	*args = xfs_args_allocate(sb, silent);
+	struct xfs_mount_args	*args;
 	int			flags = 0, error;
 
+	args = xfs_args_allocate(sb, silent);
+	if (!args)
+		return -ENOMEM;
+
 	mp = xfs_mount_init();
 
 	INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1826,7 +1837,7 @@ xfs_fs_fill_super(
 
 	xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
 
-	kmem_free(args);
+	kfree(args);
 	return 0;
 
  error2:
@@ -1874,7 +1885,7 @@ xfs_fs_fill_super(
 	kmem_free(mp);
 
  fail_vfsop:
-	kmem_free(args);
+	kfree(args);
 	return -error;
 }
 
-- 
cgit v1.2.3-70-g09d2


From c962fb7902669a48a2c613649c1f03865c0ffd1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:10:52 +1000
Subject: [XFS] kill xfs_mount_init

xfs_mount_init is inlined into xfs_fs_fill_super and allocation switched
to kzalloc. Plug a leak of the mount structure for most early mount
failures. Move xfs_icsb_init_counters to as late as possible in the mount
path and make sure to undo it so that no stale hotplug cpu notifiers are
left around on mount failures.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31196a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 37 +++++++++++++++++++++++--------------
 fs/xfs/xfs_mount.c           | 30 ++----------------------------
 fs/xfs/xfs_mount.h           |  8 ++++----
 3 files changed, 29 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d13d883d003..fe52e9276aa 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1273,10 +1273,11 @@ xfs_fs_put_super(
 	}
 
 	xfs_unmountfs(mp);
+	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
 	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
-	kmem_free(mp);
+	kfree(mp);
 }
 
 STATIC void
@@ -1733,14 +1734,20 @@ xfs_fs_fill_super(
 	struct inode		*root;
 	struct xfs_mount	*mp = NULL;
 	struct xfs_mount_args	*args;
-	int			flags = 0, error;
+	int			flags = 0, error = ENOMEM;
 
 	args = xfs_args_allocate(sb, silent);
 	if (!args)
 		return -ENOMEM;
 
-	mp = xfs_mount_init();
+	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+	if (!mp)
+		goto out_free_args;
 
+	spin_lock_init(&mp->m_sb_lock);
+	mutex_init(&mp->m_ilock);
+	mutex_init(&mp->m_growlock);
+	atomic_set(&mp->m_active_trans, 0);
 	INIT_LIST_HEAD(&mp->m_sync_list);
 	spin_lock_init(&mp->m_sync_lock);
 	init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1753,7 +1760,7 @@ xfs_fs_fill_super(
 
 	error = xfs_parseargs(mp, (char *)data, args, 0);
 	if (error)
-		goto fail_vfsop;
+		goto out_free_mp;
 
 	sb_min_blocksize(sb, BBSIZE);
 	sb->s_export_op = &xfs_export_operations;
@@ -1762,7 +1769,7 @@ xfs_fs_fill_super(
 
 	error = xfs_dmops_get(mp, args);
 	if (error)
-		goto fail_vfsop;
+		goto out_free_mp;
 	error = xfs_qmops_get(mp, args);
 	if (error)
 		goto out_put_dmops;
@@ -1774,6 +1781,9 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_put_qmops;
 
+	if (xfs_icsb_init_counters(mp))
+		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+
 	/*
 	 * Setup flags based on mount(2) options and then the superblock
 	 */
@@ -1849,12 +1859,18 @@ xfs_fs_fill_super(
 		xfs_binval(mp->m_logdev_targp);
 	if (mp->m_rtdev_targp)
 		xfs_binval(mp->m_rtdev_targp);
+ out_destroy_counters:
+	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
  out_put_qmops:
 	xfs_qmops_put(mp);
  out_put_dmops:
 	xfs_dmops_put(mp);
-	goto fail_vfsop;
+ out_free_mp:
+	kfree(mp);
+ out_free_args:
+	kfree(args);
+	return -error;
 
  fail_vnrele:
 	if (sb->s_root) {
@@ -1879,14 +1895,7 @@ xfs_fs_fill_super(
 	IRELE(mp->m_rootip);
 
 	xfs_unmountfs(mp);
-	xfs_close_devices(mp);
-	xfs_qmops_put(mp);
-	xfs_dmops_put(mp);
-	kmem_free(mp);
-
- fail_vfsop:
-	kfree(args);
-	return -error;
+	goto out_destroy_counters;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c67f8a9ae41..1bfaa204f68 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,6 @@ STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
 
 #ifdef HAVE_PERCPU_SB
-STATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
 STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
 						int);
 STATIC void	xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -62,7 +61,6 @@ STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 
 #else
 
-#define xfs_icsb_destroy_counters(mp)			do { } while (0)
 #define xfs_icsb_balance_counter(mp, a, b)		do { } while (0)
 #define xfs_icsb_balance_counter_locked(mp, a, b)	do { } while (0)
 #define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)
@@ -124,34 +122,12 @@ static const struct {
     { sizeof(xfs_sb_t),			 0 }
 };
 
-/*
- * Return a pointer to an initialized xfs_mount structure.
- */
-xfs_mount_t *
-xfs_mount_init(void)
-{
-	xfs_mount_t *mp;
-
-	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
-
-	if (xfs_icsb_init_counters(mp)) {
-		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
-	}
-
-	spin_lock_init(&mp->m_sb_lock);
-	mutex_init(&mp->m_ilock);
-	mutex_init(&mp->m_growlock);
-	atomic_set(&mp->m_active_trans, 0);
-
-	return mp;
-}
-
 /*
  * Free up the resources associated with a mount structure.  Assume that
  * the structure was initially zeroed, so we can tell which fields got
  * initialized.
  */
-void
+STATIC void
 xfs_mount_free(
 	xfs_mount_t	*mp)
 {
@@ -177,8 +153,6 @@ xfs_mount_free(
 		kmem_free(mp->m_rtname);
 	if (mp->m_logname != NULL)
 		kmem_free(mp->m_logname);
-
-	xfs_icsb_destroy_counters(mp);
 }
 
 /*
@@ -2093,7 +2067,7 @@ xfs_icsb_reinit_counters(
 	xfs_icsb_unlock(mp);
 }
 
-STATIC void
+void
 xfs_icsb_destroy_counters(
 	xfs_mount_t	*mp)
 {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5acde7fd9c0..96d8791e9e5 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -210,12 +210,14 @@ typedef struct xfs_icsb_cnts {
 
 extern int	xfs_icsb_init_counters(struct xfs_mount *);
 extern void	xfs_icsb_reinit_counters(struct xfs_mount *);
+extern void	xfs_icsb_destroy_counters(struct xfs_mount *);
 extern void	xfs_icsb_sync_counters(struct xfs_mount *, int);
 extern void	xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
 
 #else
-#define xfs_icsb_init_counters(mp)	(0)
-#define xfs_icsb_reinit_counters(mp)	do { } while (0)
+#define xfs_icsb_init_counters(mp)		(0)
+#define xfs_icsb_destroy_counters(mp)		do { } while (0)
+#define xfs_icsb_reinit_counters(mp)		do { } while (0)
 #define xfs_icsb_sync_counters(mp, flags)	do { } while (0)
 #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
 #endif
@@ -511,10 +513,8 @@ typedef struct xfs_mod_sb {
 #define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock))
 #define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
 
-extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
 extern int	xfs_log_sbcount(xfs_mount_t *, uint);
-extern void	xfs_mount_free(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp, int);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
 
-- 
cgit v1.2.3-70-g09d2


From 95db4e21b72603217f0bcafa4da9ee01fc1d2389 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:10:58 +1000
Subject: [XFS] kill calls to xfs_binval in the mount error path

xfs_binval aka xfs_flush_buftarg is the first thing done in
xfs_free_buftarg, so there is no need to have duplicated calls just before
xfs_free_buftarg in the mount failure path.

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31197a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fe52e9276aa..d2155b1de10 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1789,10 +1789,10 @@ xfs_fs_fill_super(
 	 */
 	error = xfs_start_flags(args, mp);
 	if (error)
-		goto error1;
+		goto out_destroy_counters;
 	error = xfs_readsb(mp, flags);
 	if (error)
-		goto error1;
+		goto out_destroy_counters;
 	error = xfs_finish_flags(args, mp);
 	if (error)
 		goto error2;
@@ -1853,12 +1853,6 @@ xfs_fs_fill_super(
  error2:
 	if (mp->m_sb_bp)
 		xfs_freesb(mp);
- error1:
-	xfs_binval(mp->m_ddev_targp);
-	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
-		xfs_binval(mp->m_logdev_targp);
-	if (mp->m_rtdev_targp)
-		xfs_binval(mp->m_rtdev_targp);
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
-- 
cgit v1.2.3-70-g09d2


From effa2eda3ab9c013585349b8afd305dc5decf771 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:11:05 +1000
Subject: [XFS] rename error2 goto label in xfs_fs_fill_super

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31198a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d2155b1de10..4c662d63d62 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1795,22 +1795,22 @@ xfs_fs_fill_super(
 		goto out_destroy_counters;
 	error = xfs_finish_flags(args, mp);
 	if (error)
-		goto error2;
+		goto out_free_sb;
 
 	error = xfs_setup_devices(mp);
 	if (error)
-		goto error2;
+		goto out_free_sb;
 
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
 
 	error = xfs_filestream_mount(mp);
 	if (error)
-		goto error2;
+		goto out_free_sb;
 
 	error = xfs_mountfs(mp, flags);
 	if (error)
-		goto error2;
+		goto out_free_sb;
 
 	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
 
@@ -1850,9 +1850,8 @@ xfs_fs_fill_super(
 	kfree(args);
 	return 0;
 
- error2:
-	if (mp->m_sb_bp)
-		xfs_freesb(mp);
+ out_free_sb:
+	xfs_freesb(mp);
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
-- 
cgit v1.2.3-70-g09d2


From 120226c11a6277d3e761393f0995c55218fabebb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 20 May 2008 15:11:11 +1000
Subject: [XFS] add missing call to xfs_filestream_unmount on xfs_mountfs
 failure

SGI-PV: 981951
SGI-Modid: xfs-linux-melb:xfs-kern:31199a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4c662d63d62..41eea24a46e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1810,7 +1810,7 @@ xfs_fs_fill_super(
 
 	error = xfs_mountfs(mp, flags);
 	if (error)
-		goto out_free_sb;
+		goto out_filestream_unmount;
 
 	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
 
@@ -1850,6 +1850,8 @@ xfs_fs_fill_super(
 	kfree(args);
 	return 0;
 
+ out_filestream_unmount:
+	xfs_filestream_unmount(mp);
  out_free_sb:
 	xfs_freesb(mp);
  out_destroy_counters:
-- 
cgit v1.2.3-70-g09d2


From 68f34d5107dbace3d14a1c2f060fc8941894879c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Tue, 20 May 2008 15:11:17 +1000
Subject: [XFS]

de-duplicate calls to xfs_attr_trace_enter

Every call to xfs_attr_trace_enter() shares the exact same 16 args in the
middle... just send in the context pointer and let the next level down
split it into the ktrace.

Compile tested only.

SGI-PV: 976035
SGI-Modid: xfs-linux-melb:xfs-kern:31200a

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Niv Sardi <xaiki@sgi.com>
Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_attr.c    | 106 ++++++++++++---------------------------------------
 fs/xfs/xfs_attr_sf.h |  10 ++---
 2 files changed, 28 insertions(+), 88 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 86d8619f279..5e5dbe62b19 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2300,23 +2300,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
 void
 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
 {
-	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
-		(__psunsigned_t)context->dp,
-		(__psunsigned_t)context->cursor->hashval,
-		(__psunsigned_t)context->cursor->blkno,
-		(__psunsigned_t)context->cursor->offset,
-		(__psunsigned_t)context->alist,
-		(__psunsigned_t)context->bufsize,
-		(__psunsigned_t)context->count,
-		(__psunsigned_t)context->firstu,
-		(__psunsigned_t)
-			((context->count > 0) &&
-			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-				? (ATTR_ENTRY(context->alist,
-					      context->count-1)->a_valuelen)
-				: 0,
-		(__psunsigned_t)context->dupcnt,
-		(__psunsigned_t)context->flags,
+	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
 		(__psunsigned_t)NULL,
 		(__psunsigned_t)NULL,
 		(__psunsigned_t)NULL);
@@ -2329,23 +2313,7 @@ void
 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
 			 struct xfs_da_intnode *node)
 {
-	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
-		(__psunsigned_t)context->dp,
-		(__psunsigned_t)context->cursor->hashval,
-		(__psunsigned_t)context->cursor->blkno,
-		(__psunsigned_t)context->cursor->offset,
-		(__psunsigned_t)context->alist,
-		(__psunsigned_t)context->bufsize,
-		(__psunsigned_t)context->count,
-		(__psunsigned_t)context->firstu,
-		(__psunsigned_t)
-			((context->count > 0) &&
-			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-				? (ATTR_ENTRY(context->alist,
-					      context->count-1)->a_valuelen)
-				: 0,
-		(__psunsigned_t)context->dupcnt,
-		(__psunsigned_t)context->flags,
+	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
 		(__psunsigned_t)be16_to_cpu(node->hdr.count),
 		(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
 		(__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2327,7 @@ void
 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
 			  struct xfs_da_node_entry *btree)
 {
-	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
-		(__psunsigned_t)context->dp,
-		(__psunsigned_t)context->cursor->hashval,
-		(__psunsigned_t)context->cursor->blkno,
-		(__psunsigned_t)context->cursor->offset,
-		(__psunsigned_t)context->alist,
-		(__psunsigned_t)context->bufsize,
-		(__psunsigned_t)context->count,
-		(__psunsigned_t)context->firstu,
-		(__psunsigned_t)
-			((context->count > 0) &&
-			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-				? (ATTR_ENTRY(context->alist,
-					      context->count-1)->a_valuelen)
-				: 0,
-		(__psunsigned_t)context->dupcnt,
-		(__psunsigned_t)context->flags,
+	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
 		(__psunsigned_t)be32_to_cpu(btree->hashval),
 		(__psunsigned_t)be32_to_cpu(btree->before),
 		(__psunsigned_t)NULL);
@@ -2388,23 +2340,7 @@ void
 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 			      struct xfs_attr_leafblock *leaf)
 {
-	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
-		(__psunsigned_t)context->dp,
-		(__psunsigned_t)context->cursor->hashval,
-		(__psunsigned_t)context->cursor->blkno,
-		(__psunsigned_t)context->cursor->offset,
-		(__psunsigned_t)context->alist,
-		(__psunsigned_t)context->bufsize,
-		(__psunsigned_t)context->count,
-		(__psunsigned_t)context->firstu,
-		(__psunsigned_t)
-			((context->count > 0) &&
-			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-				? (ATTR_ENTRY(context->alist,
-					      context->count-1)->a_valuelen)
-				: 0,
-		(__psunsigned_t)context->dupcnt,
-		(__psunsigned_t)context->flags,
+	xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
 		(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
 		(__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
 		(__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,22 +2353,30 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
  */
 void
 xfs_attr_trace_enter(int type, char *where,
-			 __psunsigned_t a2, __psunsigned_t a3,
-			 __psunsigned_t a4, __psunsigned_t a5,
-			 __psunsigned_t a6, __psunsigned_t a7,
-			 __psunsigned_t a8, __psunsigned_t a9,
-			 __psunsigned_t a10, __psunsigned_t a11,
-			 __psunsigned_t a12, __psunsigned_t a13,
-			 __psunsigned_t a14, __psunsigned_t a15)
+			 struct xfs_attr_list_context *context,
+			 __psunsigned_t a13, __psunsigned_t a14,
+			 __psunsigned_t a15)
 {
 	ASSERT(xfs_attr_trace_buf);
 	ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
-					 (void *)where,
-					 (void *)a2,  (void *)a3,  (void *)a4,
-					 (void *)a5,  (void *)a6,  (void *)a7,
-					 (void *)a8,  (void *)a9,  (void *)a10,
-					 (void *)a11, (void *)a12, (void *)a13,
-					 (void *)a14, (void *)a15);
+		(void *)((__psunsigned_t)where),
+		(void *)((__psunsigned_t)context->dp),
+		(void *)((__psunsigned_t)context->cursor->hashval),
+		(void *)((__psunsigned_t)context->cursor->blkno),
+		(void *)((__psunsigned_t)context->cursor->offset),
+		(void *)((__psunsigned_t)context->alist),
+		(void *)((__psunsigned_t)context->bufsize),
+		(void *)((__psunsigned_t)context->count),
+		(void *)((__psunsigned_t)context->firstu),
+		(void *)((__psunsigned_t)
+			(((context->count > 0) &&
+			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
+				? (ATTR_ENTRY(context->alist,
+					      context->count-1)->a_valuelen)
+				: 0)),
+		(void *)((__psunsigned_t)context->dupcnt),
+		(void *)((__psunsigned_t)context->flags),
+		(void *)a13, (void *)a14, (void *)a15);
 }
 #endif	/* XFS_ATTR_TRACE */
 
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b..ea22839caed 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
 void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 			      struct xfs_attr_leafblock *leaf);
 void xfs_attr_trace_enter(int type, char *where,
-			     __psunsigned_t a2, __psunsigned_t a3,
-			     __psunsigned_t a4, __psunsigned_t a5,
-			     __psunsigned_t a6, __psunsigned_t a7,
-			     __psunsigned_t a8, __psunsigned_t a9,
-			     __psunsigned_t a10, __psunsigned_t a11,
-			     __psunsigned_t a12, __psunsigned_t a13,
-			     __psunsigned_t a14, __psunsigned_t a15);
+			     struct xfs_attr_list_context *context,
+			     __psunsigned_t a13, __psunsigned_t a14,
+			     __psunsigned_t a15);
 #else
 #define	xfs_attr_trace_l_c(w,c)
 #define	xfs_attr_trace_l_cn(w,c,n)
-- 
cgit v1.2.3-70-g09d2


From 5163f95a08cbf058ae16452c2242c5600fedc32e Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:41:01 +1000
Subject: [XFS] Name operation vector for hash and compare

Adds two pieces of functionality for the basis of case-insensitive support
in XFS:

1. A comparison result enumerated type: xfs_dacmp. It represents an

exact match, case-insensitive match or no match at all. This patch

only implements different and exact results.

2. xfs_nameops vector for specifying how to perform the hash generation

of filenames and comparision methods. In this patch the hash vector

points to the existing xfs_da_hashname function and the comparison

method does a length compare, and if the same, does a memcmp and

return the xfs_dacmp result.

All filename functions that use the hash (create, lookup remove, rename,
etc) now use the xfs_nameops.hashname function and all directory lookup
functions also use the xfs_nameops.compname function.

The lookup functions also handle case-insensitive results even though the
default comparison function cannot return that. And important aspect of
the lookup functions is that an exact match always has precedence over a
case-insensitive. So while a case-insensitive match is found, we have to
keep looking just in case there is an exact match. In the meantime, the
info for the first case-insensitive match is retained if no exact match is
found.

SGI-PV: 981519
SGI-Modid: xfs-linux-melb:xfs-kern:31205a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_da_btree.c   | 22 ++++++++++++++++++
 fs/xfs/xfs_da_btree.h   | 22 ++++++++++++++++++
 fs/xfs/xfs_dir2.c       | 12 ++++++----
 fs/xfs/xfs_dir2_block.c | 33 +++++++++++++++++++-------
 fs/xfs/xfs_dir2_data.c  |  5 +++-
 fs/xfs/xfs_dir2_leaf.c  | 60 +++++++++++++++++++++++++++++++++--------------
 fs/xfs/xfs_dir2_node.c  | 23 +++++++++++-------
 fs/xfs/xfs_dir2_sf.c    | 62 ++++++++++++++++++++++++++++---------------------
 fs/xfs/xfs_mount.h      |  2 ++
 9 files changed, 174 insertions(+), 67 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 294780427ab..ae4b18c7726 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
 	}
 }
 
+enum xfs_dacmp
+xfs_da_compname(
+	struct xfs_da_args *args,
+	const char 	*name,
+	int 		len)
+{
+	return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
+					XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
+}
+
+static xfs_dahash_t
+xfs_default_hashname(
+	struct xfs_name	*name)
+{
+	return xfs_da_hashname(name->name, name->len);
+}
+
+const struct xfs_nameops xfs_default_nameops = {
+	.hashname	= xfs_default_hashname,
+	.compname	= xfs_da_compname
+};
+
 /*
  * Add a block to the btree ahead of the file.
  * Return the new block number to the caller.
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f..e64c6924996 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -98,6 +98,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
  * Btree searching and modification structure definitions.
  *========================================================================*/
 
+/*
+ * Search comparison results
+ */
+enum xfs_dacmp {
+	XFS_CMP_DIFFERENT,	/* names are completely different */
+	XFS_CMP_EXACT,		/* names are exactly the same */
+	XFS_CMP_CASE		/* names are same but differ in case */
+};
+
 /*
  * Structure to ease passing around component names.
  */
@@ -127,6 +136,7 @@ typedef struct xfs_da_args {
 	unsigned char	rename;		/* T/F: this is an atomic rename op */
 	unsigned char	addname;	/* T/F: this is an add operation */
 	unsigned char	oknoent;	/* T/F: ok to return ENOENT, else die */
+	enum xfs_dacmp	cmpresult;	/* name compare result for lookups */
 } xfs_da_args_t;
 
 /*
@@ -201,6 +211,14 @@ typedef struct xfs_da_state {
 		(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
 		(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
 
+/*
+ * Name ops for directory and/or attr name operations
+ */
+struct xfs_nameops {
+	xfs_dahash_t	(*hashname)(struct xfs_name *);
+	enum xfs_dacmp	(*compname)(struct xfs_da_args *, const char *, int);
+};
+
 
 #ifdef __KERNEL__
 /*========================================================================
@@ -249,6 +267,10 @@ int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  xfs_dabuf_t *dead_buf);
 
 uint xfs_da_hashname(const uchar_t *name_string, int name_length);
+enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
+				const char *name, int len);
+
+
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
 
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 0284af1734b..675899bb704 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -65,6 +65,7 @@ xfs_dir_mount(
 		(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
 		(uint)sizeof(xfs_da_node_entry_t);
 	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+	mp->m_dirnameops = &xfs_default_nameops;
 }
 
 /*
@@ -164,7 +165,7 @@ xfs_dir_createname(
 
 	args.name = name->name;
 	args.namelen = name->len;
-	args.hashval = xfs_da_hashname(name->name, name->len);
+	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = inum;
 	args.dp = dp;
 	args.firstblock = first;
@@ -210,11 +211,12 @@ xfs_dir_lookup(
 
 	args.name = name->name;
 	args.namelen = name->len;
-	args.hashval = xfs_da_hashname(name->name, name->len);
+	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
 	args.oknoent = 1;
+	args.cmpresult = XFS_CMP_DIFFERENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_lookup(&args);
@@ -257,7 +259,7 @@ xfs_dir_removename(
 
 	args.name = name->name;
 	args.namelen = name->len;
-	args.hashval = xfs_da_hashname(name->name, name->len);
+	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = ino;
 	args.dp = dp;
 	args.firstblock = first;
@@ -340,7 +342,7 @@ xfs_dir_replace(
 
 	args.name = name->name;
 	args.namelen = name->len;
-	args.hashval = xfs_da_hashname(name->name, name->len);
+	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = inum;
 	args.dp = dp;
 	args.firstblock = first;
@@ -388,7 +390,7 @@ xfs_dir_canenter(
 
 	args.name = name->name;
 	args.namelen = name->len;
-	args.hashval = xfs_da_hashname(name->name, name->len);
+	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index e8a7aca5fe2..98588491cb0 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -643,6 +643,7 @@ xfs_dir2_block_lookup_int(
 	int			mid;		/* binary search current idx */
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_trans_t		*tp;		/* transaction pointer */
+	enum xfs_dacmp		cmp;		/* comparison result */
 
 	dp = args->dp;
 	tp = args->trans;
@@ -697,20 +698,31 @@ xfs_dir2_block_lookup_int(
 		dep = (xfs_dir2_data_entry_t *)
 			((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
-		 * Compare, if it's right give back buffer & entry number.
+		 * Compare name and if it's an exact match, return the index
+		 * and buffer. If it's the first case-insensitive match, store
+		 * the index and buffer and continue looking for an exact match.
 		 */
-		if (dep->namelen == args->namelen &&
-		    dep->name[0] == args->name[0] &&
-		    memcmp(dep->name, args->name, args->namelen) == 0) {
+		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+			args->cmpresult = cmp;
 			*bpp = bp;
 			*entno = mid;
-			return 0;
+			if (cmp == XFS_CMP_EXACT)
+				return 0;
 		}
-	} while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
+	} while (++mid < be32_to_cpu(btp->count) &&
+			be32_to_cpu(blp[mid].hashval) == hash);
+
+	ASSERT(args->oknoent);
+	/*
+	 * Here, we can only be doing a lookup (not a rename or replace).
+	 * If a case-insensitive match was found earlier, return success.
+	 */
+	if (args->cmpresult == XFS_CMP_CASE)
+		return 0;
 	/*
 	 * No match, release the buffer and return ENOENT.
 	 */
-	ASSERT(args->oknoent);
 	xfs_da_brelse(tp, bp);
 	return XFS_ERROR(ENOENT);
 }
@@ -1033,6 +1045,7 @@ xfs_dir2_sf_to_block(
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	__be16			*tagp;		/* end of data entry */
 	xfs_trans_t		*tp;		/* transaction pointer */
+	struct xfs_name		name;
 
 	xfs_dir2_trace_args("sf_to_block", args);
 	dp = args->dp;
@@ -1187,8 +1200,10 @@ xfs_dir2_sf_to_block(
 		tagp = xfs_dir2_data_entry_tag_p(dep);
 		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
-		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
-					(char *)sfep->name, sfep->namelen));
+		name.name = sfep->name;
+		name.len = sfep->namelen;
+		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
+							hashname(&name));
 		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23..498f8d69433 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	char			*p;		/* current data position */
 	int			stale;		/* count of stale leaves */
+	struct xfs_name		name;
 
 	mp = dp->i_mount;
 	d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
 			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
-			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+			name.name = dep->name;
+			name.len = dep->namelen;
+			hash = mp->m_dirnameops->hashname(&name);
 			for (i = 0; i < be32_to_cpu(btp->count); i++) {
 				if (be32_to_cpu(lep[i].address) == addr &&
 				    be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e33433408e4..b52903bc0b1 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1331,6 +1331,8 @@ xfs_dir2_leaf_lookup_int(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_db_t		newdb;		/* new data block number */
 	xfs_trans_t		*tp;		/* transaction pointer */
+	xfs_dabuf_t		*cbp;		/* case match data buffer */
+	enum xfs_dacmp		cmp;		/* name compare result */
 
 	dp = args->dp;
 	tp = args->trans;
@@ -1354,9 +1356,11 @@ xfs_dir2_leaf_lookup_int(
 	 * Loop over all the entries with the right hash value
 	 * looking to match the name.
 	 */
+	cbp = NULL;
 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
-	     lep++, index++) {
+				index < be16_to_cpu(leaf->hdr.count) &&
+				be32_to_cpu(lep->hashval) == args->hashval;
+				lep++, index++) {
 		/*
 		 * Skip over stale leaf entries.
 		 */
@@ -1371,12 +1375,12 @@ xfs_dir2_leaf_lookup_int(
 		 * need to pitch the old one and read the new one.
 		 */
 		if (newdb != curdb) {
-			if (dbp)
+			if (dbp != cbp)
 				xfs_da_brelse(tp, dbp);
-			if ((error =
-			    xfs_da_read_buf(tp, dp,
-				    xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
-				    XFS_DATA_FORK))) {
+			error = xfs_da_read_buf(tp, dp,
+						xfs_dir2_db_to_da(mp, newdb),
+						-1, &dbp, XFS_DATA_FORK);
+			if (error) {
 				xfs_da_brelse(tp, lbp);
 				return error;
 			}
@@ -1386,24 +1390,46 @@ xfs_dir2_leaf_lookup_int(
 		/*
 		 * Point to the data entry.
 		 */
-		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)dbp->data +
-		       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+		dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
-		 * If it matches then return it.
+		 * Compare name and if it's an exact match, return the index
+		 * and buffer. If it's the first case-insensitive match, store
+		 * the index and buffer and continue looking for an exact match.
 		 */
-		if (dep->namelen == args->namelen &&
-		    dep->name[0] == args->name[0] &&
-		    memcmp(dep->name, args->name, args->namelen) == 0) {
-			*dbpp = dbp;
+		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+			args->cmpresult = cmp;
 			*indexp = index;
-			return 0;
+			/*
+			 * case exact match: release the stored CI buffer if it
+			 * exists and return the current buffer.
+			 */
+			if (cmp == XFS_CMP_EXACT) {
+				if (cbp && cbp != dbp)
+					xfs_da_brelse(tp, cbp);
+				*dbpp = dbp;
+				return 0;
+			}
+			cbp = dbp;
 		}
 	}
+	ASSERT(args->oknoent);
+	/*
+	 * Here, we can only be doing a lookup (not a rename or replace).
+	 * If a case-insensitive match was found earlier, release the current
+	 * buffer and return the stored CI matching buffer.
+	 */
+	if (args->cmpresult == XFS_CMP_CASE) {
+		if (cbp != dbp)
+			xfs_da_brelse(tp, dbp);
+		*dbpp = cbp;
+		return 0;
+	}
 	/*
 	 * No match found, return ENOENT.
 	 */
-	ASSERT(args->oknoent);
+	ASSERT(cbp == NULL);
 	if (dbp)
 		xfs_da_brelse(tp, dbp);
 	xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index e29b7c63e19..fedf8f976a1 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -556,6 +556,7 @@ xfs_dir2_leafn_lookup_for_entry(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_db_t		newdb;		/* new data block number */
 	xfs_trans_t		*tp;		/* transaction pointer */
+	enum xfs_dacmp		cmp;		/* comparison result */
 
 	dp = args->dp;
 	tp = args->trans;
@@ -620,17 +621,21 @@ xfs_dir2_leafn_lookup_for_entry(
 		dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
 			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
-		 * Compare the entry, return it if it matches.
+		 * Compare the entry and if it's an exact match, return
+		 * EEXIST immediately. If it's the first case-insensitive
+		 * match, store the inode number and continue looking.
 		 */
-		if (dep->namelen == args->namelen && memcmp(dep->name,
-					args->name, args->namelen) == 0) {
+		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+			args->cmpresult = cmp;
 			args->inumber = be64_to_cpu(dep->inumber);
 			di = (int)((char *)dep - (char *)curbp->data);
 			error = EEXIST;
-			goto out;
+			if (cmp == XFS_CMP_EXACT)
+				goto out;
 		}
 	}
-	/* Didn't find a match. */
+	/* Didn't find an exact match. */
 	error = ENOENT;
 	di = -1;
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
@@ -1813,6 +1818,8 @@ xfs_dir2_node_lookup(
 	error = xfs_da_node_lookup_int(state, &rval);
 	if (error)
 		rval = error;
+	else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE)
+		rval = EEXIST;	/* a case-insensitive match was found */
 	/*
 	 * Release the btree blocks and leaf block.
 	 */
@@ -1856,9 +1863,8 @@ xfs_dir2_node_removename(
 	 * Look up the entry we're deleting, set up the cursor.
 	 */
 	error = xfs_da_node_lookup_int(state, &rval);
-	if (error) {
+	if (error)
 		rval = error;
-	}
 	/*
 	 * Didn't find it, upper layer screwed up.
 	 */
@@ -1875,9 +1881,8 @@ xfs_dir2_node_removename(
 	 */
 	error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
 		&state->extrablk, &rval);
-	if (error) {
+	if (error)
 		return error;
-	}
 	/*
 	 * Fix the hash values up the btree.
 	 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index ca33bc62edc..dcd09cada43 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -814,6 +814,7 @@ xfs_dir2_sf_lookup(
 	int			i;		/* entry index */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
+	enum xfs_dacmp		cmp;		/* comparison result */
 
 	xfs_dir2_trace_args("sf_lookup", args);
 	xfs_dir2_sf_check(args);
@@ -836,6 +837,7 @@ xfs_dir2_sf_lookup(
 	 */
 	if (args->namelen == 1 && args->name[0] == '.') {
 		args->inumber = dp->i_ino;
+		args->cmpresult = XFS_CMP_EXACT;
 		return XFS_ERROR(EEXIST);
 	}
 	/*
@@ -844,27 +846,39 @@ xfs_dir2_sf_lookup(
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
 		args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+		args->cmpresult = XFS_CMP_EXACT;
 		return XFS_ERROR(EEXIST);
 	}
 	/*
 	 * Loop over all the entries trying to match ours.
 	 */
-	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-	     i < sfp->hdr.count;
-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-		if (sfep->namelen == args->namelen &&
-		    sfep->name[0] == args->name[0] &&
-		    memcmp(args->name, sfep->name, args->namelen) == 0) {
-			args->inumber =
-				xfs_dir2_sf_get_inumber(sfp,
-					xfs_dir2_sf_inumberp(sfep));
-			return XFS_ERROR(EEXIST);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+		/*
+		 * Compare name and if it's an exact match, return the inode
+		 * number. If it's the first case-insensitive match, store the
+		 * inode number and continue looking for an exact match.
+		 */
+		cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
+								sfep->namelen);
+		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+			args->cmpresult = cmp;
+			args->inumber = xfs_dir2_sf_get_inumber(sfp,
+						xfs_dir2_sf_inumberp(sfep));
+			if (cmp == XFS_CMP_EXACT)
+				return XFS_ERROR(EEXIST);
 		}
 	}
+	ASSERT(args->oknoent);
+	/*
+	 * Here, we can only be doing a lookup (not a rename or replace).
+	 * If a case-insensitive match was found earlier, return "found".
+	 */
+	if (args->cmpresult == XFS_CMP_CASE)
+		return XFS_ERROR(EEXIST);
 	/*
 	 * Didn't find it.
 	 */
-	ASSERT(args->oknoent);
 	return XFS_ERROR(ENOENT);
 }
 
@@ -904,24 +918,21 @@ xfs_dir2_sf_removename(
 	 * Loop over the old directory entries.
 	 * Find the one we're deleting.
 	 */
-	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-	     i < sfp->hdr.count;
-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-		if (sfep->namelen == args->namelen &&
-		    sfep->name[0] == args->name[0] &&
-		    memcmp(sfep->name, args->name, args->namelen) == 0) {
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+		if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+								XFS_CMP_EXACT) {
 			ASSERT(xfs_dir2_sf_get_inumber(sfp,
-					xfs_dir2_sf_inumberp(sfep)) ==
-				args->inumber);
+						xfs_dir2_sf_inumberp(sfep)) ==
+								args->inumber);
 			break;
 		}
 	}
 	/*
 	 * Didn't find it.
 	 */
-	if (i == sfp->hdr.count) {
+	if (i == sfp->hdr.count)
 		return XFS_ERROR(ENOENT);
-	}
 	/*
 	 * Calculate sizes.
 	 */
@@ -1042,11 +1053,10 @@ xfs_dir2_sf_replace(
 	 */
 	else {
 		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-		     i < sfp->hdr.count;
-		     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-			if (sfep->namelen == args->namelen &&
-			    sfep->name[0] == args->name[0] &&
-			    memcmp(args->name, sfep->name, args->namelen) == 0) {
+				i < sfp->hdr.count;
+				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+			if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+								XFS_CMP_EXACT) {
 #if XFS_BIG_INUMS || defined(DEBUG)
 				ino = xfs_dir2_sf_get_inumber(sfp,
 					xfs_dir2_sf_inumberp(sfep));
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 96d8791e9e5..2a75f1703b3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
 struct xfs_mru_cache;
+struct xfs_nameops;
 
 /*
  * Prototypes and functions for the Data Migration subsystem.
@@ -315,6 +316,7 @@ typedef struct xfs_mount {
 	__uint8_t		m_inode_quiesce;/* call quiesce on new inodes.
 						   field governed by m_ilock */
 	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */
+	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */
 	int			m_dirblksize;	/* directory block sz--bytes */
 	int			m_dirblkfsbs;	/* directory block sz--fsbs */
 	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */
-- 
cgit v1.2.3-70-g09d2


From 6a178100abf01282eb697ab62b6086b2886dfc00 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:42:05 +1000
Subject: [XFS] Add op_flags field and helpers to xfs_da_args

The end of the xfs_da_args structure has 4 unsigned char fields for
true/false information on directory and attr operations using the
xfs_da_args structure.

The following converts these 4 into a op_flags field that uses the first 4
bits for these fields and allows expansion for future operation
information (eg. case-insensitive lookup request).

SGI-PV: 981520
SGI-Modid: xfs-linux-melb:xfs-kern:31206a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_attr.c       | 11 +++++------
 fs/xfs/xfs_attr_leaf.c  | 20 +++++++++++---------
 fs/xfs/xfs_da_btree.c   |  2 +-
 fs/xfs/xfs_da_btree.h   | 13 +++++++++----
 fs/xfs/xfs_dir2.c       | 14 ++++++++------
 fs/xfs/xfs_dir2_block.c | 10 +++++-----
 fs/xfs/xfs_dir2_leaf.c  | 15 ++++++++-------
 fs/xfs/xfs_dir2_node.c  | 16 +++++++++-------
 fs/xfs/xfs_dir2_sf.c    |  8 ++++----
 fs/xfs/xfs_dir2_trace.c | 20 +++++++++++---------
 10 files changed, 71 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5e5dbe62b19..557dad611de 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -241,8 +241,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	args.firstblock = &firstblock;
 	args.flist = &flist;
 	args.whichfork = XFS_ATTR_FORK;
-	args.addname = 1;
-	args.oknoent = 1;
+	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 
 	/*
 	 * Determine space new attribute will use, and if it would be
@@ -974,7 +973,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 			xfs_da_brelse(args->trans, bp);
 			return(retval);
 		}
-		args->rename = 1;			/* an atomic rename */
+		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
 		args->index2 = args->index;
 		args->rmtblkno2 = args->rmtblkno;
@@ -1054,7 +1053,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 	 * so that one disappears and one appears atomically.  Then we
 	 * must remove the "old" attribute/value pair.
 	 */
-	if (args->rename) {
+	if (args->op_flags & XFS_DA_OP_RENAME) {
 		/*
 		 * In a separate transaction, set the incomplete flag on the
 		 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1307,7 +1306,7 @@ restart:
 	} else if (retval == EEXIST) {
 		if (args->flags & ATTR_CREATE)
 			goto out;
-		args->rename = 1;			/* atomic rename op */
+		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
 		args->index2 = args->index;
 		args->rmtblkno2 = args->rmtblkno;
@@ -1425,7 +1424,7 @@ restart:
 	 * so that one disappears and one appears atomically.  Then we
 	 * must remove the "old" attribute/value pair.
 	 */
-	if (args->rename) {
+	if (args->op_flags & XFS_DA_OP_RENAME) {
 		/*
 		 * In a separate transaction, set the incomplete flag on the
 		 * "old" attr and clear the incomplete flag on the "new" attr.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a85e9caf015..cb345e6e485 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -369,9 +369,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	 * Fix up the start offset of the attribute fork
 	 */
 	totsize -= size;
-	if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
-	    (mp->m_flags & XFS_MOUNT_ATTR2) && 
-	    (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
+	if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
+				!(args->op_flags & XFS_DA_OP_ADDNAME) &&
+				(mp->m_flags & XFS_MOUNT_ATTR2) &&
+				(dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
 		/*
 		 * Last attribute now removed, revert to original
 		 * inode format making all literal area available
@@ -389,9 +390,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 		xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
 		dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
 		ASSERT(dp->i_d.di_forkoff);
-		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
-			!(mp->m_flags & XFS_MOUNT_ATTR2) ||
-			dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
+				(args->op_flags & XFS_DA_OP_ADDNAME) ||
+				!(mp->m_flags & XFS_MOUNT_ATTR2) ||
+				dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
 		dp->i_afp->if_ext_max =
 			XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
 		dp->i_df.if_ext_max =
@@ -531,7 +533,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
-	nargs.oknoent = 1;
+	nargs.op_flags = XFS_DA_OP_OKNOENT;
 
 	sfe = &sf->list[0];
 	for (i = 0; i < sf->hdr.count; i++) {
@@ -853,7 +855,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
-	nargs.oknoent = 1;
+	nargs.op_flags = XFS_DA_OP_OKNOENT;
 	entry = &leaf->entries[0];
 	for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -1155,7 +1157,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
 	entry->hashval = cpu_to_be32(args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
 	entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
-	if (args->rename) {
+	if (args->op_flags & XFS_DA_OP_RENAME) {
 		entry->flags |= XFS_ATTR_INCOMPLETE;
 		if ((args->blkno2 == args->blkno) &&
 		    (args->index2 <= args->index)) {
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index ae4b18c7726..edc0aef4e51 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 	}
 	if (level < 0) {
 		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
-		ASSERT(args->oknoent);
+		ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 		return(0);
 	}
 
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index e64c6924996..8face64c11f 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -132,13 +132,18 @@ typedef struct xfs_da_args {
 	int		index2;		/* index of 2nd attr in blk */
 	xfs_dablk_t	rmtblkno2;	/* remote attr value starting blkno */
 	int		rmtblkcnt2;	/* remote attr value block count */
-	unsigned char	justcheck;	/* T/F: check for ok with no space */
-	unsigned char	rename;		/* T/F: this is an atomic rename op */
-	unsigned char	addname;	/* T/F: this is an add operation */
-	unsigned char	oknoent;	/* T/F: ok to return ENOENT, else die */
+	int		op_flags;	/* operation flags */
 	enum xfs_dacmp	cmpresult;	/* name compare result for lookups */
 } xfs_da_args_t;
 
+/*
+ * Operation flags:
+ */
+#define XFS_DA_OP_JUSTCHECK	0x0001	/* check for ok with no space */
+#define XFS_DA_OP_RENAME	0x0002	/* this is an atomic rename op */
+#define XFS_DA_OP_ADDNAME	0x0004	/* this is an add operation */
+#define XFS_DA_OP_OKNOENT	0x0008	/* lookup/add op, ENOENT ok, else die */
+
 /*
  * Structure to describe buffer(s) for a block.
  * This is needed in the directory version 2 format case, when
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 675899bb704..3387acd3e47 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,8 @@
 
 struct xfs_name xfs_name_dotdot = {"..", 2};
 
+extern const struct xfs_nameops xfs_default_nameops;
+
 void
 xfs_dir_mount(
 	xfs_mount_t	*mp)
@@ -173,8 +175,7 @@ xfs_dir_createname(
 	args.total = total;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.justcheck = 0;
-	args.addname = args.oknoent = 1;
+	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_addname(&args);
@@ -215,7 +216,7 @@ xfs_dir_lookup(
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.oknoent = 1;
+	args.op_flags = XFS_DA_OP_OKNOENT;
 	args.cmpresult = XFS_CMP_DIFFERENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -267,7 +268,7 @@ xfs_dir_removename(
 	args.total = total;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.justcheck = args.addname = args.oknoent = 0;
+	args.op_flags = 0;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_removename(&args);
@@ -350,7 +351,7 @@ xfs_dir_replace(
 	args.total = total;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.justcheck = args.addname = args.oknoent = 0;
+	args.op_flags = 0;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_replace(&args);
@@ -394,7 +395,8 @@ xfs_dir_canenter(
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.justcheck = args.addname = args.oknoent = 1;
+	args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+							XFS_DA_OP_OKNOENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_addname(&args);
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 98588491cb0..dee225918db 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
 	/*
 	 * If this isn't a real add, we're done with the buffer.
 	 */
-	if (args->justcheck)
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 		xfs_da_brelse(tp, bp);
 	/*
 	 * If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
 		 * Not trying to actually do anything, or don't have
 		 * a space reservation: return no-space.
 		 */
-		if (args->justcheck || args->total == 0)
+		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
 			return XFS_ERROR(ENOSPC);
 		/*
 		 * Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
 	/*
 	 * Just checking, and it would work, so say so.
 	 */
-	if (args->justcheck)
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 		return 0;
 	needlog = needscan = 0;
 	/*
@@ -674,7 +674,7 @@ xfs_dir2_block_lookup_int(
 		else
 			high = mid - 1;
 		if (low > high) {
-			ASSERT(args->oknoent);
+			ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 			xfs_da_brelse(tp, bp);
 			return XFS_ERROR(ENOENT);
 		}
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
 	} while (++mid < be32_to_cpu(btp->count) &&
 			be32_to_cpu(blp[mid].hashval) == hash);
 
-	ASSERT(args->oknoent);
+	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	/*
 	 * Here, we can only be doing a lookup (not a rename or replace).
 	 * If a case-insensitive match was found earlier, return success.
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index b52903bc0b1..2ebbed4f1b0 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
 	 * If we don't have enough free bytes but we can make enough
 	 * by compacting out stale entries, we'll do that.
 	 */
-	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
-	    be16_to_cpu(leaf->hdr.stale) > 1) {
+	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
+				needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
 		compact = 1;
 	}
 	/*
 	 * Otherwise if we don't have enough free bytes we need to
 	 * convert to node form.
 	 */
-	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
-		 needbytes) {
+	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
+						leaf->hdr.count)] < needbytes) {
 		/*
 		 * Just checking or no space reservation, give up.
 		 */
-		if (args->justcheck || args->total == 0) {
+		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+							args->total == 0) {
 			xfs_da_brelse(tp, lbp);
 			return XFS_ERROR(ENOSPC);
 		}
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
 	 * If just checking, then it will fit unless we needed to allocate
 	 * a new data block.
 	 */
-	if (args->justcheck) {
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
 		xfs_da_brelse(tp, lbp);
 		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
 	}
@@ -1414,7 +1415,7 @@ xfs_dir2_leaf_lookup_int(
 			cbp = dbp;
 		}
 	}
-	ASSERT(args->oknoent);
+	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	/*
 	 * Here, we can only be doing a lookup (not a rename or replace).
 	 * If a case-insensitive match was found earlier, release the current
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index fedf8f976a1..c71cff85950 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
 	       be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
 
-	if (args->justcheck)
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 		return 0;
 
 	/*
@@ -515,7 +515,7 @@ xfs_dir2_leafn_lookup_for_addname(
 	/* Didn't find any space */
 	fi = -1;
 out:
-	ASSERT(args->oknoent);
+	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	if (curbp) {
 		/* Giving back a free block. */
 		state->extravalid = 1;
@@ -638,7 +638,8 @@ xfs_dir2_leafn_lookup_for_entry(
 	/* Didn't find an exact match. */
 	error = ENOENT;
 	di = -1;
-	ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
+	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
+					(args->op_flags & XFS_DA_OP_OKNOENT));
 out:
 	if (curbp) {
 		/* Giving back a data block. */
@@ -669,7 +670,7 @@ xfs_dir2_leafn_lookup_int(
 	int			*indexp,	/* out: leaf entry index */
 	xfs_da_state_t		*state)		/* state to fill in */
 {
-	if (args->addname)
+	if (args->op_flags & XFS_DA_OP_ADDNAME)
 		return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
 							state);
 	return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
@@ -1383,7 +1384,7 @@ xfs_dir2_node_addname(
 		/*
 		 * It worked, fix the hash values up the btree.
 		 */
-		if (!args->justcheck)
+		if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
 			xfs_da_fixhashpath(state, &state->path);
 	} else {
 		/*
@@ -1566,7 +1567,8 @@ xfs_dir2_node_addname_int(
 		/*
 		 * Not allowed to allocate, return failure.
 		 */
-		if (args->justcheck || args->total == 0) {
+		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+							args->total == 0) {
 			/*
 			 * Drop the freespace buffer unless it came from our
 			 * caller.
@@ -1712,7 +1714,7 @@ xfs_dir2_node_addname_int(
 		/*
 		 * If just checking, we succeeded.
 		 */
-		if (args->justcheck) {
+		if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
 			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
 				xfs_da_buf_done(fbp);
 			return 0;
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index dcd09cada43..9409fd3e565 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
 		/*
 		 * Just checking or no space reservation, it doesn't fit.
 		 */
-		if (args->justcheck || args->total == 0)
+		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
 			return XFS_ERROR(ENOSPC);
 		/*
 		 * Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
 	/*
 	 * Just checking, it fits.
 	 */
-	if (args->justcheck)
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 		return 0;
 	/*
 	 * Do it the easy way - just add it at the end.
@@ -869,7 +869,7 @@ xfs_dir2_sf_lookup(
 				return XFS_ERROR(EEXIST);
 		}
 	}
-	ASSERT(args->oknoent);
+	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	/*
 	 * Here, we can only be doing a lookup (not a rename or replace).
 	 * If a case-insensitive match was found earlier, return "found".
@@ -1071,7 +1071,7 @@ xfs_dir2_sf_replace(
 		 * Didn't find it.
 		 */
 		if (i == sfp->hdr.count) {
-			ASSERT(args->oknoent);
+			ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 #if XFS_BIG_INUMS
 			if (i8elevated)
 				xfs_dir2_sf_toino4(args);
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5..6cc7c0c681a 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck, NULL, NULL);
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+		NULL, NULL);
 }
 
 void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck,
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
 		(void *)(bp ? bp->bps[0] : NULL), NULL);
 }
 
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck,
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
 		(void *)(lbp ? lbp->bps[0] : NULL),
 		(void *)(dbp ? dbp->bps[0] : NULL));
 }
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck, (void *)(long)db,
-		(void *)dbp);
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+		(void *)(long)db, (void *)dbp);
 }
 
 void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck,
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
 		(void *)((unsigned long)(i >> 32)),
 		(void *)((unsigned long)(i & 0xFFFFFFFF)));
 }
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck, (void *)(long)s, NULL);
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+		(void *)(long)s, NULL);
 }
 
 void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
 		(void *)((unsigned long)(args->inumber >> 32)),
 		(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
 		(void *)args->dp, (void *)args->trans,
-		(void *)(unsigned long)args->justcheck, (void *)(long)s,
-		(void *)dbp);
+		(void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+		(void *)(long)s, (void *)dbp);
 }
 #endif	/* XFS_DIR2_TRACE */
-- 
cgit v1.2.3-70-g09d2


From 9403540c0653122ca34884a180439ddbfcbcb524 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:50:46 +1000
Subject: dcache: Add case-insensitive support d_ci_add() routine

This add a dcache entry to the dcache for lookup, but changing the name
that is associated with the entry rather than the one passed in to the
lookup routine.

First, it sees if the case-exact match already exists in the dcache and
uses it if one exists. Otherwise, it allocates a new node with the new
name and splices it into the dcache.

Original code from ntfs_lookup in fs/ntfs/namei.c by Anton Altaparmakov.

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Acked-by: Christoph Hellwig <hch@infradead.org>
---
 fs/dcache.c            | 102 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   1 +
 2 files changed, 103 insertions(+)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb4..101663d15e9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	return new;
 }
 
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode:  the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name:   the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+			struct qstr *name)
+{
+	int error;
+	struct dentry *found;
+	struct dentry *new;
+
+	/* Does a dentry matching the name exist already? */
+	found = d_hash_and_lookup(dentry->d_parent, name);
+	/* If not, create it now and return */
+	if (!found) {
+		new = d_alloc(dentry->d_parent, name);
+		if (!new) {
+			error = -ENOMEM;
+			goto err_out;
+		}
+		found = d_splice_alias(inode, new);
+		if (found) {
+			dput(new);
+			return found;
+		}
+		return new;
+	}
+	/* Matching dentry exists, check if it is negative. */
+	if (found->d_inode) {
+		if (unlikely(found->d_inode != inode)) {
+			/* This can't happen because bad inodes are unhashed. */
+			BUG_ON(!is_bad_inode(inode));
+			BUG_ON(!is_bad_inode(found->d_inode));
+		}
+		/*
+		 * Already have the inode and the dentry attached, decrement
+		 * the reference count to balance the iget() done
+		 * earlier on.  We found the dentry using d_lookup() so it
+		 * cannot be disconnected and thus we do not need to worry
+		 * about any NFS/disconnectedness issues here.
+		 */
+		iput(inode);
+		return found;
+	}
+	/*
+	 * Negative dentry: instantiate it unless the inode is a directory and
+	 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+	 * in which case d_move() that in place of the found dentry.
+	 */
+	if (!S_ISDIR(inode->i_mode)) {
+		/* Not a directory; everything is easy. */
+		d_instantiate(found, inode);
+		return found;
+	}
+	spin_lock(&dcache_lock);
+	if (list_empty(&inode->i_dentry)) {
+		/*
+		 * Directory without a 'disconnected' dentry; we need to do
+		 * d_instantiate() by hand because it takes dcache_lock which
+		 * we already hold.
+		 */
+		list_add(&found->d_alias, &inode->i_dentry);
+		found->d_inode = inode;
+		spin_unlock(&dcache_lock);
+		security_d_instantiate(found, inode);
+		return found;
+	}
+	/*
+	 * Directory with a 'disconnected' dentry; get a reference to the
+	 * 'disconnected' dentry.
+	 */
+	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+	dget_locked(new);
+	spin_unlock(&dcache_lock);
+	/* Do security vodoo. */
+	security_d_instantiate(found, inode);
+	/* Move new in place of found. */
+	d_move(new, found);
+	/* Balance the iget() we did above. */
+	iput(inode);
+	/* Throw away found. */
+	dput(found);
+	/* Use new as the actual dentry. */
+	return new;
+
+err_out:
+	iput(inode);
+	return ERR_PTR(error);
+}
 
 /**
  * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
 EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 98202c672fd..07aa198f19e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,6 +230,7 @@ extern void d_delete(struct dentry *);
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct inode *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3-70-g09d2


From 384f3ced07efdddf6838f6527366089d37843c94 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:58:22 +1000
Subject: [XFS] Return case-insensitive match for dentry cache

This implements the code to store the actual filename found during a
lookup in the dentry cache and to avoid multiple entries in the dcache
pointing to the same inode.

To avoid polluting the dcache, we implement a new directory inode
operations for lookup. xfs_vn_ci_lookup() stores the correct case name in
the dcache.

The "actual name" is only allocated and returned for a case- insensitive
match and not an actual match.

Another unusual interaction with the dcache is not storing negative
dentries like other filesystems doing a d_add(dentry, NULL) when an ENOENT
is returned. During the VFS lookup, if a dentry returned has no inode,
dput is called and ENOENT is returned. By not doing a d_add, this actually
removes it completely from the dcache to be reused. create/rename have to
be modified to support unhashed dentries being passed in.

SGI-PV: 981521
SGI-Modid: xfs-linux-melb:xfs-kern:31208a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/linux-2.6/xfs_export.c |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c   | 57 ++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/linux-2.6/xfs_iops.h   |  1 +
 fs/xfs/xfs_da_btree.h         |  1 +
 fs/xfs/xfs_dir2.c             | 40 ++++++++++++++++++++++++++++--
 fs/xfs/xfs_dir2.h             |  6 ++++-
 fs/xfs/xfs_dir2_block.c       |  9 ++++---
 fs/xfs/xfs_dir2_leaf.c        |  5 ++--
 fs/xfs/xfs_dir2_node.c        | 16 ++++++++----
 fs/xfs/xfs_dir2_sf.c          | 17 +++++++------
 fs/xfs/xfs_vnodeops.c         | 19 +++++++++++----
 fs/xfs/xfs_vnodeops.h         |  2 +-
 12 files changed, 146 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b1..987fe84f7b1 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
 	struct xfs_inode	*cip;
 	struct dentry		*parent;
 
-	error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
+	error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 13b6cfd366c..9f0f8ee8d44 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -382,7 +382,7 @@ xfs_vn_lookup(
 		return ERR_PTR(-ENAMETOOLONG);
 
 	xfs_dentry_to_name(&name, dentry);
-	error = xfs_lookup(XFS_I(dir), &name, &cip);
+	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
 			return ERR_PTR(-error);
@@ -393,6 +393,42 @@ xfs_vn_lookup(
 	return d_splice_alias(cip->i_vnode, dentry);
 }
 
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	struct nameidata *nd)
+{
+	struct xfs_inode *ip;
+	struct xfs_name	xname;
+	struct xfs_name ci_name;
+	struct qstr	dname;
+	int		error;
+
+	if (dentry->d_name.len >= MAXNAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	xfs_dentry_to_name(&xname, dentry);
+	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+	if (unlikely(error)) {
+		if (unlikely(error != ENOENT))
+			return ERR_PTR(-error);
+		d_add(dentry, NULL);
+		return NULL;
+	}
+
+	/* if exact match, just splice and exit */
+	if (!ci_name.name)
+		return d_splice_alias(ip->i_vnode, dentry);
+
+	/* else case-insensitive match... */
+	dname.name = ci_name.name;
+	dname.len = ci_name.len;
+	dentry = d_add_ci(ip->i_vnode, dentry, &dname);
+	kmem_free(ci_name.name);
+	return dentry;
+}
+
 STATIC int
 xfs_vn_link(
 	struct dentry	*old_dentry,
@@ -892,6 +928,25 @@ const struct inode_operations xfs_dir_inode_operations = {
 	.removexattr		= xfs_vn_removexattr,
 };
 
+const struct inode_operations xfs_dir_ci_inode_operations = {
+	.create			= xfs_vn_create,
+	.lookup			= xfs_vn_ci_lookup,
+	.link			= xfs_vn_link,
+	.unlink			= xfs_vn_unlink,
+	.symlink		= xfs_vn_symlink,
+	.mkdir			= xfs_vn_mkdir,
+	.rmdir			= xfs_vn_rmdir,
+	.mknod			= xfs_vn_mknod,
+	.rename			= xfs_vn_rename,
+	.permission		= xfs_vn_permission,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= xfs_vn_setxattr,
+	.getxattr		= xfs_vn_getxattr,
+	.listxattr		= xfs_vn_listxattr,
+	.removexattr		= xfs_vn_removexattr,
+};
+
 const struct inode_operations xfs_symlink_inode_operations = {
 	.readlink		= generic_readlink,
 	.follow_link		= xfs_vn_follow_link,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7aff..3b4df5863e4 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -20,6 +20,7 @@
 
 extern const struct inode_operations xfs_inode_operations;
 extern const struct inode_operations xfs_dir_inode_operations;
+extern const struct inode_operations xfs_dir_ci_inode_operations;
 extern const struct inode_operations xfs_symlink_inode_operations;
 
 extern const struct file_operations xfs_file_operations;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8face64c11f..8be0b00ede9 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -143,6 +143,7 @@ typedef struct xfs_da_args {
 #define XFS_DA_OP_RENAME	0x0002	/* this is an atomic rename op */
 #define XFS_DA_OP_ADDNAME	0x0004	/* this is an add operation */
 #define XFS_DA_OP_OKNOENT	0x0008	/* lookup/add op, ENOENT ok, else die */
+#define XFS_DA_OP_CILOOKUP	0x0010	/* lookup to return CI name if found */
 
 /*
  * Structure to describe buffer(s) for a block.
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 3387acd3e47..882609c699c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -192,15 +192,44 @@ xfs_dir_createname(
 	return rval;
 }
 
+/*
+ * If doing a CI lookup and case-insensitive match, dup actual name into
+ * args.value. Return EEXIST for success (ie. name found) or an error.
+ */
+int
+xfs_dir_cilookup_result(
+	struct xfs_da_args *args,
+	const char	*name,
+	int		len)
+{
+	if (args->cmpresult == XFS_CMP_DIFFERENT)
+		return ENOENT;
+	if (args->cmpresult != XFS_CMP_CASE ||
+					!(args->op_flags & XFS_DA_OP_CILOOKUP))
+		return EEXIST;
+
+	args->value = kmem_alloc(len, KM_MAYFAIL);
+	if (!args->value)
+		return ENOMEM;
+
+	memcpy(args->value, name, len);
+	args->valuelen = len;
+	return EEXIST;
+}
+
 /*
  * Lookup a name in a directory, give back the inode number.
+ * If ci_name is not NULL, returns the actual name in ci_name if it differs
+ * to name, or ci_name->name is set to NULL for an exact match.
  */
+
 int
 xfs_dir_lookup(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*dp,
 	struct xfs_name	*name,
-	xfs_ino_t	*inum)		/* out: inode number */
+	xfs_ino_t	*inum,		/* out: inode number */
+	struct xfs_name *ci_name)	/* out: actual name if CI match */
 {
 	xfs_da_args_t	args;
 	int		rval;
@@ -217,6 +246,8 @@ xfs_dir_lookup(
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
 	args.op_flags = XFS_DA_OP_OKNOENT;
+	if (ci_name)
+		args.op_flags |= XFS_DA_OP_CILOOKUP;
 	args.cmpresult = XFS_CMP_DIFFERENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -233,8 +264,13 @@ xfs_dir_lookup(
 		rval = xfs_dir2_node_lookup(&args);
 	if (rval == EEXIST)
 		rval = 0;
-	if (rval == 0)
+	if (!rval) {
 		*inum = args.inumber;
+		if (ci_name) {
+			ci_name->name = args.value;
+			ci_name->len = args.valuelen;
+		}
+	}
 	return rval;
 }
 
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029..1d9ef96f33a 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
 				xfs_fsblock_t *first,
 				struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
-				struct xfs_name *name, xfs_ino_t *inum);
+				struct xfs_name *name, xfs_ino_t *inum,
+				struct xfs_name *ci_name);
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t ino,
 				xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
 				struct xfs_dabuf *bp);
 
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
+				int len);
+
 #endif	/* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index dee225918db..e2fa0a1d8e9 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
 	/*
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
-	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+	dep = (xfs_dir2_data_entry_t *)((char *)block +
+		xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
-	 * Fill in inode number, release the block.
+	 * Fill in inode number, CI name if appropriate, release the block.
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
+	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
 	xfs_da_brelse(args->trans, bp);
-	return XFS_ERROR(EEXIST);
+	return XFS_ERROR(error);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 2ebbed4f1b0..f110242d6df 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1299,12 +1299,13 @@ xfs_dir2_leaf_lookup(
 	      ((char *)dbp->data +
 	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
-	 * Return the found inode number.
+	 * Return the found inode number & CI name if appropriate
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
+	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
 	xfs_da_brelse(tp, dbp);
 	xfs_da_brelse(tp, lbp);
-	return XFS_ERROR(EEXIST);
+	return XFS_ERROR(error);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index c71cff85950..1b543022346 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -549,7 +549,7 @@ xfs_dir2_leafn_lookup_for_entry(
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
-	int			di;		/* data entry index */
+	int			di = -1;	/* data entry index */
 	int			index;		/* leaf entry index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
@@ -577,6 +577,7 @@ xfs_dir2_leafn_lookup_for_entry(
 	if (state->extravalid) {
 		curbp = state->extrablk.bp;
 		curdb = state->extrablk.blkno;
+		di = state->extrablk.index;
 	}
 	/*
 	 * Loop over leaf entries with the right hash value.
@@ -637,7 +638,6 @@ xfs_dir2_leafn_lookup_for_entry(
 	}
 	/* Didn't find an exact match. */
 	error = ENOENT;
-	di = -1;
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
 					(args->op_flags & XFS_DA_OP_OKNOENT));
 out:
@@ -652,7 +652,7 @@ out:
 		state->extravalid = 0;
 	}
 	/*
-	 * Return the index, that will be the insertion point.
+	 * Return the index, that will be the deletion point for remove/replace.
 	 */
 	*indexp = index;
 	return XFS_ERROR(error);
@@ -1820,8 +1820,14 @@ xfs_dir2_node_lookup(
 	error = xfs_da_node_lookup_int(state, &rval);
 	if (error)
 		rval = error;
-	else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE)
-		rval = EEXIST;	/* a case-insensitive match was found */
+	else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
+		/* If a CI match, dup the actual name and return EEXIST */
+		xfs_dir2_data_entry_t	*dep;
+
+		dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
+						data + state->extrablk.index);
+		rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+	}
 	/*
 	 * Release the btree blocks and leaf block.
 	 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 9409fd3e565..b46af0013ec 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -812,9 +812,11 @@ xfs_dir2_sf_lookup(
 {
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			i;		/* entry index */
+	int			error;
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
 	xfs_dir2_sf_t		*sfp;		/* shortform structure */
 	enum xfs_dacmp		cmp;		/* comparison result */
+	xfs_dir2_sf_entry_t	*ci_sfep;	/* case-insens. entry */
 
 	xfs_dir2_trace_args("sf_lookup", args);
 	xfs_dir2_sf_check(args);
@@ -852,6 +854,7 @@ xfs_dir2_sf_lookup(
 	/*
 	 * Loop over all the entries trying to match ours.
 	 */
+	ci_sfep = NULL;
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
 				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		/*
@@ -867,19 +870,19 @@ xfs_dir2_sf_lookup(
 						xfs_dir2_sf_inumberp(sfep));
 			if (cmp == XFS_CMP_EXACT)
 				return XFS_ERROR(EEXIST);
+			ci_sfep = sfep;
 		}
 	}
 	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	/*
 	 * Here, we can only be doing a lookup (not a rename or replace).
-	 * If a case-insensitive match was found earlier, return "found".
+	 * If a case-insensitive match was not found, return ENOENT.
 	 */
-	if (args->cmpresult == XFS_CMP_CASE)
-		return XFS_ERROR(EEXIST);
-	/*
-	 * Didn't find it.
-	 */
-	return XFS_ERROR(ENOENT);
+	if (!ci_sfep)
+		return XFS_ERROR(ENOENT);
+	/* otherwise process the CI match as required by the caller */
+	error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
+	return XFS_ERROR(error);
 }
 
 /*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9b8b87fcd4e..b6a065eb25a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1610,12 +1610,18 @@ xfs_inactive(
 	return VN_INACTIVE_CACHE;
 }
 
-
+/*
+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
+ * ci_name->name will point to a the actual name (caller must free) or
+ * will be set to NULL if an exact match is found.
+ */
 int
 xfs_lookup(
 	xfs_inode_t		*dp,
 	struct xfs_name		*name,
-	xfs_inode_t		**ipp)
+	xfs_inode_t		**ipp,
+	struct xfs_name		*ci_name)
 {
 	xfs_ino_t		inum;
 	int			error;
@@ -1627,7 +1633,7 @@ xfs_lookup(
 		return XFS_ERROR(EIO);
 
 	lock_mode = xfs_ilock_map_shared(dp);
-	error = xfs_dir_lookup(NULL, dp, name, &inum);
+	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
 	xfs_iunlock_map_shared(dp, lock_mode);
 
 	if (error)
@@ -1635,12 +1641,15 @@ xfs_lookup(
 
 	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
 	if (error)
-		goto out;
+		goto out_free_name;
 
 	xfs_itrace_ref(*ipp);
 	return 0;
 
- out:
+out_free_name:
+	if (ci_name)
+		kmem_free(ci_name->name);
+out:
 	*ipp = NULL;
 	return error;
 }
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce5..7e9a8b241f2 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -22,7 +22,7 @@ int xfs_fsync(struct xfs_inode *ip);
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
-		struct xfs_inode **ipp);
+		struct xfs_inode **ipp, struct xfs_name *ci_name);
 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
 		xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
-- 
cgit v1.2.3-70-g09d2


From 189f4bf22bdc3c2402b038016d11fd3cb1c89f07 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:58:55 +1000
Subject: [XFS] XFS: ASCII case-insensitive support

Implement ASCII case-insensitive support. It's primary purpose is for
supporting existing filesystems that already use this case-insensitive
mode migrated from IRIX. But, if you only need ASCII-only case-insensitive
support (ie. English only) and will never use another language, then this
mode is perfectly adequate.

ASCII-CI is implemented by generating hashes based on lower-case letters
and doing lower-case compares. It implements a new xfs_nameops vector for
doing the hashes and comparisons for all filename operations.

To create a filesystem with this CI mode, use: # mkfs.xfs -n version=ci
<device>

SGI-PV: 981516
SGI-Modid: xfs-linux-melb:xfs-kern:31209a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/linux-2.6/xfs_linux.h |  1 +
 fs/xfs/linux-2.6/xfs_super.c |  5 ++++-
 fs/xfs/xfs_dir2.c            | 51 +++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_fs.h              |  1 +
 fs/xfs/xfs_fsops.c           |  4 +++-
 fs/xfs/xfs_sb.h              | 10 ++++++++-
 6 files changed, 68 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b5..aded57321b1 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -76,6 +76,7 @@
 #include <linux/log2.h>
 #include <linux/spinlock.h>
 #include <linux/random.h>
+#include <linux/ctype.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 41eea24a46e..cce59cc6e74 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -569,7 +569,10 @@ xfs_set_inodeops(
 		inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
-		inode->i_op = &xfs_dir_inode_operations;
+		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+			inode->i_op = &xfs_dir_ci_inode_operations;
+		else
+			inode->i_op = &xfs_dir_inode_operations;
 		inode->i_fop = &xfs_dir_file_operations;
 		break;
 	case S_IFLNK:
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 882609c699c..b445ec31476 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -48,6 +48,52 @@ struct xfs_name xfs_name_dotdot = {"..", 2};
 
 extern const struct xfs_nameops xfs_default_nameops;
 
+/*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+ * used in IRIX.
+ */
+STATIC xfs_dahash_t
+xfs_ascii_ci_hashname(
+	struct xfs_name	*name)
+{
+	xfs_dahash_t	hash;
+	int		i;
+
+	for (i = 0, hash = 0; i < name->len; i++)
+		hash = tolower(name->name[i]) ^ rol32(hash, 7);
+
+	return hash;
+}
+
+STATIC enum xfs_dacmp
+xfs_ascii_ci_compname(
+	struct xfs_da_args *args,
+	const char	*name,
+	int 		len)
+{
+	enum xfs_dacmp	result;
+	int		i;
+
+	if (args->namelen != len)
+		return XFS_CMP_DIFFERENT;
+
+	result = XFS_CMP_EXACT;
+	for (i = 0; i < len; i++) {
+		if (args->name[i] == name[i])
+			continue;
+		if (tolower(args->name[i]) != tolower(name[i]))
+			return XFS_CMP_DIFFERENT;
+		result = XFS_CMP_CASE;
+	}
+
+	return result;
+}
+
+static struct xfs_nameops xfs_ascii_ci_nameops = {
+	.hashname	= xfs_ascii_ci_hashname,
+	.compname	= xfs_ascii_ci_compname,
+};
+
 void
 xfs_dir_mount(
 	xfs_mount_t	*mp)
@@ -67,7 +113,10 @@ xfs_dir_mount(
 		(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
 		(uint)sizeof(xfs_da_node_entry_t);
 	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
-	mp->m_dirnameops = &xfs_default_nameops;
+	if (xfs_sb_version_hasasciici(&mp->m_sb))
+		mp->m_dirnameops = &xfs_ascii_ci_nameops;
+	else
+		mp->m_dirnameops = &xfs_default_nameops;
 }
 
 /*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d05..6ca749897c5 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_LOGV2	0x0100	/* log format version 2	*/
 #define XFS_FSOP_GEOM_FLAGS_SECTOR	0x0200	/* sector sizes >1BB	*/
 #define XFS_FSOP_GEOM_FLAGS_ATTR2	0x0400	/* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_DIRV2CI	0x1000	/* ASCII only CI names */
 #define XFS_FSOP_GEOM_FLAGS_LAZYSB	0x4000	/* lazy superblock counters */
 
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7b..84583cf73db 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
 			(xfs_sb_version_hassector(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+			(xfs_sb_version_hasasciici(&mp->m_sb) ?
+				XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
 			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
 			(xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
 			thaw_bdev(sb->s_bdev, sb);
 		}
-	
+
 		break;
 	}
 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index e3204a36a22..3f8cf1587f4 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION_SECTORBIT	0x0800
 #define	XFS_SB_VERSION_EXTFLGBIT	0x1000
 #define	XFS_SB_VERSION_DIRV2BIT		0x2000
+#define	XFS_SB_VERSION_BORGBIT		0x4000	/* ASCII only case-insens. */
 #define	XFS_SB_VERSION_MOREBITSBIT	0x8000
 #define	XFS_SB_VERSION_OKSASHFBITS	\
 	(XFS_SB_VERSION_EXTFLGBIT | \
-	 XFS_SB_VERSION_DIRV2BIT)
+	 XFS_SB_VERSION_DIRV2BIT | \
+	 XFS_SB_VERSION_BORGBIT)
 #define	XFS_SB_VERSION_OKREALFBITS	\
 	(XFS_SB_VERSION_ATTRBIT | \
 	 XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
 		((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
 }
 
+static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+		(sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
+}
+
 static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
-- 
cgit v1.2.3-70-g09d2


From d3689d7687dbbc46c5004557d53349f6952fbc93 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 18:38:40 +1000
Subject: [XFS] kmem_free and kmem_realloc to use const void *

SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31212a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/linux-2.6/kmem.c | 4 ++--
 fs/xfs/linux-2.6/kmem.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 69233a52f0a..1cd3b55ee3d 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
 }
 
 void
-kmem_free(void *ptr)
+kmem_free(const void *ptr)
 {
 	if (!is_vmalloc_addr(ptr)) {
 		kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr)
 }
 
 void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
 	     unsigned int __nocast flags)
 {
 	void	*new;
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a3c96207e60..af6843c7ee4 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
 extern void *kmem_alloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(void *);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void  kmem_free(const void *);
 
 /*
  * Zone interfaces
-- 
cgit v1.2.3-70-g09d2


From 866d5dc974682c6247d5fde94dbc6545f864e7d7 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Thu, 22 May 2008 17:21:40 +1000
Subject: [XFS] Remove d_add call for an ENOENT lookup return code

SGI-PV: 981521
SGI-Modid: xfs-linux-melb:xfs-kern:31214a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9f0f8ee8d44..62330f28395 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -413,7 +413,11 @@ xfs_vn_ci_lookup(
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
 			return ERR_PTR(-error);
-		d_add(dentry, NULL);
+		/*
+		 * call d_add(dentry, NULL) here when d_drop_negative_children
+		 * is called in xfs_vn_mknod (ie. allow negative dentries
+		 * with CI filesystems).
+		 */
 		return NULL;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 87affd08bc9c741b99053cabb908cf54a135a0fa Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Tue, 3 Jun 2008 11:59:18 +1000
Subject: [XFS] Zero uninitialised xfs_da_args structure in xfs_dir2.c

Fixes a problem in the xfs_dir2_remove and xfs_dir2_replace paths which
intenally call directory format specific lookup funtions that assume
args->cmpresult is zeroed.

SGI-PV: 982606
SGI-Modid: xfs-linux-melb:xfs-kern:31268a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_dir2.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index b445ec31476..80e0dc51361 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -214,6 +214,7 @@ xfs_dir_createname(
 		return rval;
 	XFS_STATS_INC(xs_dir_create);
 
+	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
@@ -286,8 +287,8 @@ xfs_dir_lookup(
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 	XFS_STATS_INC(xs_dir_lookup);
-	memset(&args, 0, sizeof(xfs_da_args_t));
 
+	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
@@ -297,7 +298,6 @@ xfs_dir_lookup(
 	args.op_flags = XFS_DA_OP_OKNOENT;
 	if (ci_name)
 		args.op_flags |= XFS_DA_OP_CILOOKUP;
-	args.cmpresult = XFS_CMP_DIFFERENT;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_lookup(&args);
@@ -343,6 +343,7 @@ xfs_dir_removename(
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 	XFS_STATS_INC(xs_dir_remove);
 
+	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
@@ -353,7 +354,6 @@ xfs_dir_removename(
 	args.total = total;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.op_flags = 0;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_removename(&args);
@@ -426,6 +426,7 @@ xfs_dir_replace(
 	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
 		return rval;
 
+	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
@@ -436,7 +437,6 @@ xfs_dir_replace(
 	args.total = total;
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
-	args.op_flags = 0;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_replace(&args);
@@ -472,8 +472,8 @@ xfs_dir_canenter(
 		return 0;
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	memset(&args, 0, sizeof(xfs_da_args_t));
 
+	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-- 
cgit v1.2.3-70-g09d2


From d532506cd8b59543b376e155508f88a03a81dad1 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Mon, 16 Jun 2008 12:07:41 +1000
Subject: [XFS] Invalidate dentry in unlink/rmdir if in case-insensitive mode

The vfs_unlink/d_delete functionality in the Linux VFS make the
dentry negative if it is the only inode being referenced. Case-insensitive
mode doesn't work with negative dentries, so if using CI-mode, invalidate
the dentry on unlink/rmdir.

SGI-PV: 983102
SGI-Modid: xfs-linux-melb:xfs-kern:31308a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/linux-2.6/xfs_iops.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 62330f28395..190ed61bcd4 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -475,6 +475,13 @@ xfs_vn_unlink(
 	if (likely(!error)) {
 		xfs_validate_fields(dir);	/* size needs update */
 		xfs_validate_fields(inode);
+		/*
+		 * With unlink, the VFS makes the dentry "negative": no inode,
+		 * but still hashed. This is incompatible with case-insensitive
+		 * mode, so invalidate (unhash) the dentry in CI-mode.
+		 */
+		if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+			d_invalidate(dentry);
 	}
 	return -error;
 }
@@ -531,6 +538,13 @@ xfs_vn_rmdir(
 	if (likely(!error)) {
 		xfs_validate_fields(inode);
 		xfs_validate_fields(dir);
+		/*
+		 * With rmdir, the VFS makes the dentry "negative": no inode,
+		 * but still hashed. This is incompatible with case-insensitive
+		 * mode, so invalidate (unhash) the dentry in CI-mode.
+		 */
+		if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+			d_invalidate(dentry);
 	}
 	return -error;
 }
-- 
cgit v1.2.3-70-g09d2


From 0ec585163ac81e329bde25fb6311a043a1c63952 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>
Date: Mon, 23 Jun 2008 13:23:01 +1000
Subject: [XFS] Use the generic xattr methods.

Use the generic set, get and removexattr methods and supply the s_xattr
array with fine-grained handlers. All XFS/Linux highlevel attr handling is
rewritten from scratch and placed into fs/xfs/linux-2.6/xfs_xattr.c so
that it's separated from the generic low-level code.

SGI-PV: 982343

SGI-Modid: xfs-linux-melb:xfs-kern:31234a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/Makefile              |   3 +-
 fs/xfs/linux-2.6/xfs_iops.c  | 118 +++----------------
 fs/xfs/linux-2.6/xfs_iops.h  |   1 +
 fs/xfs/linux-2.6/xfs_super.c |   1 +
 fs/xfs/linux-2.6/xfs_super.h |   1 +
 fs/xfs/xfs_attr.c            | 272 -------------------------------------------
 fs/xfs/xfs_attr.h            |  17 ---
 7 files changed, 18 insertions(+), 395 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699..737c9a42536 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y				+= $(addprefix $(XFS_LINUX)/, \
 				   xfs_iops.o \
 				   xfs_lrw.o \
 				   xfs_super.o \
-				   xfs_vnode.o)
+				   xfs_vnode.o \
+				   xfs_xattr.o)
 
 # Objects in support/
 xfs-y				+= $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 190ed61bcd4..3ae80155de3 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -275,7 +275,7 @@ xfs_vn_mknod(
 	struct xfs_inode *ip = NULL;
 	xfs_acl_t	*default_acl = NULL;
 	struct xfs_name	name;
-	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
+	int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
 	int		error;
 
 	/*
@@ -781,98 +781,6 @@ xfs_vn_truncate(
 	WARN_ON(error);
 }
 
-STATIC int
-xfs_vn_setxattr(
-	struct dentry	*dentry,
-	const char	*name,
-	const void	*data,
-	size_t		size,
-	int		flags)
-{
-	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
-	char		*attr = (char *)name;
-	attrnames_t	*namesp;
-	int		xflags = 0;
-
-	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	attr += namesp->attr_namelen;
-
-	/* Convert Linux syscall to XFS internal ATTR flags */
-	if (flags & XATTR_CREATE)
-		xflags |= ATTR_CREATE;
-	if (flags & XATTR_REPLACE)
-		xflags |= ATTR_REPLACE;
-	xflags |= namesp->attr_flag;
-	return namesp->attr_set(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_getxattr(
-	struct dentry	*dentry,
-	const char	*name,
-	void		*data,
-	size_t		size)
-{
-	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
-	char		*attr = (char *)name;
-	attrnames_t	*namesp;
-	int		xflags = 0;
-
-	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	attr += namesp->attr_namelen;
-
-	/* Convert Linux syscall to XFS internal ATTR flags */
-	if (!size) {
-		xflags |= ATTR_KERNOVAL;
-		data = NULL;
-	}
-	xflags |= namesp->attr_flag;
-	return namesp->attr_get(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_listxattr(
-	struct dentry		*dentry,
-	char			*data,
-	size_t			size)
-{
-	bhv_vnode_t		*vp = vn_from_inode(dentry->d_inode);
-	int			error, xflags = ATTR_KERNAMELS;
-	ssize_t			result;
-
-	if (!size)
-		xflags |= ATTR_KERNOVAL;
-	xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
-
-	error = attr_generic_list(vp, data, size, xflags, &result);
-	if (error < 0)
-		return error;
-	return result;
-}
-
-STATIC int
-xfs_vn_removexattr(
-	struct dentry	*dentry,
-	const char	*name)
-{
-	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
-	char		*attr = (char *)name;
-	attrnames_t	*namesp;
-	int		xflags = 0;
-
-	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	attr += namesp->attr_namelen;
-
-	xflags |= namesp->attr_flag;
-	return namesp->attr_remove(vp, attr, xflags);
-}
-
 STATIC long
 xfs_vn_fallocate(
 	struct inode	*inode,
@@ -920,10 +828,10 @@ const struct inode_operations xfs_inode_operations = {
 	.truncate		= xfs_vn_truncate,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
-	.setxattr		= xfs_vn_setxattr,
-	.getxattr		= xfs_vn_getxattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
-	.removexattr		= xfs_vn_removexattr,
 	.fallocate		= xfs_vn_fallocate,
 };
 
@@ -940,10 +848,10 @@ const struct inode_operations xfs_dir_inode_operations = {
 	.permission		= xfs_vn_permission,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
-	.setxattr		= xfs_vn_setxattr,
-	.getxattr		= xfs_vn_getxattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
-	.removexattr		= xfs_vn_removexattr,
 };
 
 const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -959,10 +867,10 @@ const struct inode_operations xfs_dir_ci_inode_operations = {
 	.permission		= xfs_vn_permission,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
-	.setxattr		= xfs_vn_setxattr,
-	.getxattr		= xfs_vn_getxattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
-	.removexattr		= xfs_vn_removexattr,
 };
 
 const struct inode_operations xfs_symlink_inode_operations = {
@@ -972,8 +880,8 @@ const struct inode_operations xfs_symlink_inode_operations = {
 	.permission		= xfs_vn_permission,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
-	.setxattr		= xfs_vn_setxattr,
-	.getxattr		= xfs_vn_getxattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
-	.removexattr		= xfs_vn_removexattr,
 };
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 3b4df5863e4..d97ba934a2a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -27,6 +27,7 @@ extern const struct file_operations xfs_file_operations;
 extern const struct file_operations xfs_dir_file_operations;
 extern const struct file_operations xfs_invis_file_operations;
 
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
 struct xfs_inode;
 extern void xfs_ichgtime(struct xfs_inode *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cce59cc6e74..967603c4699 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1766,6 +1766,7 @@ xfs_fs_fill_super(
 		goto out_free_mp;
 
 	sb_min_blocksize(sb, BBSIZE);
+	sb->s_xattr = xfs_xattr_handlers;
 	sb->s_export_op = &xfs_export_operations;
 	sb->s_qcop = &xfs_quotactl_operations;
 	sb->s_op = &xfs_super_operations;
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 212bdc7a789..b7d13da01bd 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -110,6 +110,7 @@ extern void xfs_flush_device(struct xfs_inode *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
+extern struct xattr_handler *xfs_xattr_handlers[];
 
 #define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
 
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 557dad611de..9d91af4929b 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -57,11 +57,6 @@
  * Provide the external interfaces to manage attribute lists.
  */
 
-#define ATTR_SYSCOUNT	2
-static struct attrnames posix_acl_access;
-static struct attrnames posix_acl_default;
-static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
-
 /*========================================================================
  * Function prototypes for the kernel.
  *========================================================================*/
@@ -2378,270 +2373,3 @@ xfs_attr_trace_enter(int type, char *where,
 		(void *)a13, (void *)a14, (void *)a15);
 }
 #endif	/* XFS_ATTR_TRACE */
-
-
-/*========================================================================
- * System (pseudo) namespace attribute interface routines.
- *========================================================================*/
-
-STATIC int
-posix_acl_access_set(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_remove(
-	bhv_vnode_t *vp, char *name, int xflags)
-{
-	return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_get(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_exists(
-	bhv_vnode_t *vp)
-{
-	return xfs_acl_vhasacl_access(vp);
-}
-
-STATIC int
-posix_acl_default_set(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_get(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_remove(
-	bhv_vnode_t *vp, char *name, int xflags)
-{
-	return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_exists(
-	bhv_vnode_t *vp)
-{
-	return xfs_acl_vhasacl_default(vp);
-}
-
-static struct attrnames posix_acl_access = {
-	.attr_name	= "posix_acl_access",
-	.attr_namelen	= sizeof("posix_acl_access") - 1,
-	.attr_get	= posix_acl_access_get,
-	.attr_set	= posix_acl_access_set,
-	.attr_remove	= posix_acl_access_remove,
-	.attr_exists	= posix_acl_access_exists,
-};
-
-static struct attrnames posix_acl_default = {
-	.attr_name	= "posix_acl_default",
-	.attr_namelen	= sizeof("posix_acl_default") - 1,
-	.attr_get	= posix_acl_default_get,
-	.attr_set	= posix_acl_default_set,
-	.attr_remove	= posix_acl_default_remove,
-	.attr_exists	= posix_acl_default_exists,
-};
-
-static struct attrnames *attr_system_names[] =
-	{ &posix_acl_access, &posix_acl_default };
-
-
-/*========================================================================
- * Namespace-prefix-style attribute name interface routines.
- *========================================================================*/
-
-STATIC int
-attr_generic_set(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
-}
-
-STATIC int
-attr_generic_get(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	int	error, asize = size;
-
-	error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
-	if (!error)
-		return asize;
-	return -error;
-}
-
-STATIC int
-attr_generic_remove(
-	bhv_vnode_t *vp, char *name, int xflags)
-{
-	return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
-}
-
-STATIC int
-attr_generic_listadd(
-	attrnames_t		*prefix,
-	attrnames_t		*namesp,
-	void			*data,
-	size_t			size,
-	ssize_t			*result)
-{
-	char			*p = data + *result;
-
-	*result += prefix->attr_namelen;
-	*result += namesp->attr_namelen + 1;
-	if (!size)
-		return 0;
-	if (*result > size)
-		return -ERANGE;
-	strcpy(p, prefix->attr_name);
-	p += prefix->attr_namelen;
-	strcpy(p, namesp->attr_name);
-	p += namesp->attr_namelen + 1;
-	return 0;
-}
-
-STATIC int
-attr_system_list(
-	bhv_vnode_t		*vp,
-	void			*data,
-	size_t			size,
-	ssize_t			*result)
-{
-	attrnames_t		*namesp;
-	int			i, error = 0;
-
-	for (i = 0; i < ATTR_SYSCOUNT; i++) {
-		namesp = attr_system_names[i];
-		if (!namesp->attr_exists || !namesp->attr_exists(vp))
-			continue;
-		error = attr_generic_listadd(&attr_system, namesp,
-						data, size, result);
-		if (error)
-			break;
-	}
-	return error;
-}
-
-int
-attr_generic_list(
-	bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
-{
-	attrlist_cursor_kern_t	cursor = { 0 };
-	int			error;
-
-	error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
-	if (error > 0)
-		return -error;
-	*result = -error;
-	return attr_system_list(vp, data, size, result);
-}
-
-attrnames_t *
-attr_lookup_namespace(
-	char			*name,
-	struct attrnames	**names,
-	int			nnames)
-{
-	int			i;
-
-	for (i = 0; i < nnames; i++)
-		if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
-			return names[i];
-	return NULL;
-}
-
-STATIC int
-attr_system_set(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	attrnames_t	*namesp;
-	int		error;
-
-	if (xflags & ATTR_CREATE)
-		return -EINVAL;
-
-	namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	error = namesp->attr_set(vp, name, data, size, xflags);
-	if (!error)
-		error = vn_revalidate(vp);
-	return error;
-}
-
-STATIC int
-attr_system_get(
-	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-	attrnames_t	*namesp;
-
-	namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	return namesp->attr_get(vp, name, data, size, xflags);
-}
-
-STATIC int
-attr_system_remove(
-	bhv_vnode_t *vp, char *name, int xflags)
-{
-	attrnames_t	*namesp;
-
-	namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-	if (!namesp)
-		return -EOPNOTSUPP;
-	return namesp->attr_remove(vp, name, xflags);
-}
-
-struct attrnames attr_system = {
-	.attr_name	= "system.",
-	.attr_namelen	= sizeof("system.") - 1,
-	.attr_flag	= ATTR_SYSTEM,
-	.attr_get	= attr_system_get,
-	.attr_set	= attr_system_set,
-	.attr_remove	= attr_system_remove,
-};
-
-struct attrnames attr_trusted = {
-	.attr_name	= "trusted.",
-	.attr_namelen	= sizeof("trusted.") - 1,
-	.attr_flag	= ATTR_ROOT,
-	.attr_get	= attr_generic_get,
-	.attr_set	= attr_generic_set,
-	.attr_remove	= attr_generic_remove,
-};
-
-struct attrnames attr_secure = {
-	.attr_name	= "security.",
-	.attr_namelen	= sizeof("security.") - 1,
-	.attr_flag	= ATTR_SECURE,
-	.attr_get	= attr_generic_get,
-	.attr_set	= attr_generic_set,
-	.attr_remove	= attr_generic_remove,
-};
-
-struct attrnames attr_user = {
-	.attr_name	= "user.",
-	.attr_namelen	= sizeof("user.") - 1,
-	.attr_get	= attr_generic_get,
-	.attr_set	= attr_generic_set,
-	.attr_remove	= attr_generic_remove,
-};
-
-struct attrnames *attr_namespaces[] =
-	{ &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 9b96d171b75..c1f7d43e5ec 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -38,30 +38,14 @@
 struct cred;
 struct xfs_attr_list_context;
 
-typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
-typedef int (*attrexists_t)(bhv_vnode_t *);
-
 typedef struct attrnames {
 	char *		attr_name;
 	unsigned int	attr_namelen;
-	unsigned int	attr_flag;
-	attrget_t	attr_get;
-	attrset_t	attr_set;
-	attrremove_t	attr_remove;
-	attrexists_t	attr_exists;
 } attrnames_t;
 
-#define ATTR_NAMECOUNT	4
 extern struct attrnames attr_user;
 extern struct attrnames attr_secure;
-extern struct attrnames attr_system;
 extern struct attrnames attr_trusted;
-extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
-
-extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
-extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
 
 #define ATTR_DONTFOLLOW	0x0001	/* -- unused, from IRIX -- */
 #define ATTR_ROOT	0x0002	/* use attrs in root (trusted) namespace */
@@ -69,7 +53,6 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
 #define ATTR_SECURE	0x0008	/* use attrs in security namespace */
 #define ATTR_CREATE	0x0010	/* pure create: fail if attr already exists */
 #define ATTR_REPLACE	0x0020	/* pure set: fail if attr does not exist */
-#define ATTR_SYSTEM	0x0100	/* use attrs in system (pseudo) namespace */
 
 #define ATTR_KERNACCESS	0x0400	/* [kernel] iaccess, inode held io-locked */
 #define ATTR_KERNOTIME	0x1000	/* [kernel] don't update inode timestamps */
-- 
cgit v1.2.3-70-g09d2


From ae23a5e87dbbf4657a82e1ff8ebc52ab50361c14 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Mon, 23 Jun 2008 13:23:32 +1000
Subject: [XFS] Pack some shortform dir2 structures for the ARM old ABI
 architecture.

This should fix the longstanding issues with xfs and old ABI arm boxes,
which lead to various asserts and xfs shutdowns, and for which an
(incorrect) patch has been floating around for years.

I've verified this patch by comparing the on-disk structure layouts using
pahole from the dwarves package, as well as running through a bit of xfsqa
under qemu-arm, modified so that the check/repair phase after each test
actually executes check/repair from the x86 host, on the filesystem
populated by the arm emulator. Thus far it all looks good.

There are 2 other structures with extra padding at the end, but they don't
seem to cause trouble. I suppose they could be packed as well:
xfs_dir2_data_unused_t and xfs_dir2_sf_t.

Note that userspace needs a similar treatment, and any filesystems which
were running with the previous rogue "fix" will now see corruption (either
in the kernel, or during xfs_repair) with this fix properly in place; it
may be worth teaching xfs_repair to identify and fix that specific issue.

SGI-PV: 982930

SGI-Modid: xfs-linux-melb:xfs-kern:31280a

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_linux.h | 7 +++++++
 fs/xfs/xfs_dir2_sf.h         | 6 +++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index aded57321b1..4d45d9351a6 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -300,4 +300,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
 	return x;
 }
 
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
 #endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d..deecc9d238f 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
  * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
  * Only need 16 bits, this is the byte offset into the single block form.
  */
-typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
 
 /*
  * The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
 	__uint8_t		count;		/* count of entries */
 	__uint8_t		i8count;	/* count of 8-byte inode #s */
 	xfs_dir2_inou_t		parent;		/* parent dir inode number */
-} xfs_dir2_sf_hdr_t;
+} __arch_pack xfs_dir2_sf_hdr_t;
 
 typedef struct xfs_dir2_sf_entry {
 	__uint8_t		namelen;	/* actual name length */
 	xfs_dir2_sf_off_t	offset;		/* saved offset */
 	__uint8_t		name[1];	/* name, variable size */
 	xfs_dir2_inou_t		inumber;	/* inode number, var. offset */
-} xfs_dir2_sf_entry_t;
+} __arch_pack xfs_dir2_sf_entry_t; 
 
 typedef struct xfs_dir2_sf {
 	xfs_dir2_sf_hdr_t	hdr;		/* shortform header */
-- 
cgit v1.2.3-70-g09d2


From caf8aabdbc6849de772850d26d3dbe35e8f63bff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 23 Jun 2008 13:23:41 +1000
Subject: [XFS] Factor out code for whether inode has attributes or not.

SGI-PV: 983394

SGI-Modid: xfs-linux-melb:xfs-kern:31323a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_attr.c | 51 +++++++++++++++++++++++----------------------------
 1 file changed, 23 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 9d91af4929b..49fac8d6db1 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -111,6 +111,17 @@ xfs_attr_name_to_xname(
 	return 0;
 }
 
+STATIC int
+xfs_inode_hasattr(
+	struct xfs_inode	*ip)
+{
+	if (!XFS_IFORK_Q(ip) ||
+	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+	     ip->i_d.di_anextents == 0))
+		return 0;
+	return 1;
+}
+
 /*========================================================================
  * Overall external interface routines.
  *========================================================================*/
@@ -122,10 +133,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
 	xfs_da_args_t   args;
 	int             error;
 
-	if ((XFS_IFORK_Q(ip) == 0) ||
-	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     ip->i_d.di_anextents == 0))
-		return(ENOATTR);
+	if (!xfs_inode_hasattr(ip))
+		return ENOATTR;
 
 	/*
 	 * Fill in the arg structure for this request.
@@ -143,11 +152,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
-	if (XFS_IFORK_Q(ip) == 0 ||
-	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     ip->i_d.di_anextents == 0)) {
-		error = XFS_ERROR(ENOATTR);
-	} else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+	if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
 		error = xfs_attr_shortform_getvalue(&args);
 	} else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
 		error = xfs_attr_leaf_get(&args);
@@ -523,9 +528,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
-	if (XFS_IFORK_Q(dp) == 0 ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     dp->i_d.di_anextents == 0)) {
+	if (!xfs_inode_hasattr(dp)) {
 		error = XFS_ERROR(ENOATTR);
 		goto out;
 	}
@@ -595,11 +598,9 @@ xfs_attr_remove(
 		return error;
 
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (XFS_IFORK_Q(dp) == 0 ||
-		   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-		    dp->i_d.di_anextents == 0)) {
+	if (!xfs_inode_hasattr(dp)) {
 		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(XFS_ERROR(ENOATTR));
+		return XFS_ERROR(ENOATTR);
 	}
 	xfs_iunlock(dp, XFS_ILOCK_SHARED);
 
@@ -615,9 +616,7 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
-	if (XFS_IFORK_Q(dp) == 0 ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     dp->i_d.di_anextents == 0)) {
+	if (!xfs_inode_hasattr(dp)) {
 		error = 0;
 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
 		error = xfs_attr_shortform_list(context);
@@ -810,12 +809,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
 	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
 
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if ((XFS_IFORK_Q(dp) == 0) ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     dp->i_d.di_anextents == 0)) {
+	if (!xfs_inode_hasattr(dp) ||
+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
 		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return(0);
+		return 0;
 	}
 	xfs_iunlock(dp, XFS_ILOCK_SHARED);
 
@@ -848,10 +845,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
-	if ((XFS_IFORK_Q(dp) == 0) ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     dp->i_d.di_anextents == 0)) {
+	if (!xfs_inode_hasattr(dp) ||
+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
 		error = 0;
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From ad9b463aa206b8c8f0bab378cf7c090c1a9a8e34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 23 Jun 2008 13:23:48 +1000
Subject: [XFS] Switches xfs_vn_listxattr to set it's put_listent callback
 directly and not go through xfs_attr_list.

SGI-PV: 983395

SGI-Modid: xfs-linux-melb:xfs-kern:31324a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_acl.c       |   3 +-
 fs/xfs/xfs_attr.c      | 139 ++++++++++++++++---------------------------------
 fs/xfs/xfs_attr.h      |  55 ++++++++++---------
 fs/xfs/xfs_attr_leaf.c |  61 +++-------------------
 fs/xfs/xfs_attr_leaf.h |  29 +----------
 5 files changed, 84 insertions(+), 203 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703..93057af2fe3 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -341,8 +341,7 @@ xfs_acl_iaccess(
 
 	/* If the file has no ACL return -1. */
 	rval = sizeof(xfs_acl_t);
-	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
-					ATTR_ROOT | ATTR_KERNACCESS)) {
+	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
 		_ACL_FREE(acl);
 		return -1;
 	}
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 49fac8d6db1..78de80e3caa 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#include <linux/capability.h>
-
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -607,12 +605,20 @@ xfs_attr_remove(
 	return xfs_attr_remove_int(dp, &xname, flags);
 }
 
-STATIC int
+int
 xfs_attr_list_int(xfs_attr_list_context_t *context)
 {
 	int error;
 	xfs_inode_t *dp = context->dp;
 
+	XFS_STATS_INC(xs_attr_list);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return EIO;
+
+	xfs_ilock(dp, XFS_ILOCK_SHARED);
+	xfs_attr_trace_l_c("syscall start", context);
+
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
@@ -625,6 +631,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
 	} else {
 		error = xfs_attr_node_list(context);
 	}
+
+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
+	xfs_attr_trace_l_c("syscall end", context);
+
 	return error;
 }
 
@@ -641,74 +651,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
  */
 /*ARGSUSED*/
 STATIC int
-xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
+xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
 		     char *name, int namelen,
 		     int valuelen, char *value)
 {
+	struct attrlist *alist = (struct attrlist *)context->alist;
 	attrlist_ent_t *aep;
 	int arraytop;
 
 	ASSERT(!(context->flags & ATTR_KERNOVAL));
 	ASSERT(context->count >= 0);
 	ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
-	ASSERT(context->firstu >= sizeof(*context->alist));
+	ASSERT(context->firstu >= sizeof(*alist));
 	ASSERT(context->firstu <= context->bufsize);
 
-	arraytop = sizeof(*context->alist) +
-			context->count * sizeof(context->alist->al_offset[0]);
+	/*
+	 * Only list entries in the right namespace.
+	 */
+	if (((context->flags & ATTR_SECURE) == 0) !=
+	    ((flags & XFS_ATTR_SECURE) == 0))
+		return 0;
+	if (((context->flags & ATTR_ROOT) == 0) !=
+	    ((flags & XFS_ATTR_ROOT) == 0))
+		return 0;
+
+	arraytop = sizeof(*alist) +
+			context->count * sizeof(alist->al_offset[0]);
 	context->firstu -= ATTR_ENTSIZE(namelen);
 	if (context->firstu < arraytop) {
 		xfs_attr_trace_l_c("buffer full", context);
-		context->alist->al_more = 1;
+		alist->al_more = 1;
 		context->seen_enough = 1;
 		return 1;
 	}
 
-	aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
+	aep = (attrlist_ent_t *)&context->alist[context->firstu];
 	aep->a_valuelen = valuelen;
 	memcpy(aep->a_name, name, namelen);
-	aep->a_name[ namelen ] = 0;
-	context->alist->al_offset[ context->count++ ] = context->firstu;
-	context->alist->al_count = context->count;
+	aep->a_name[namelen] = 0;
+	alist->al_offset[context->count++] = context->firstu;
+	alist->al_count = context->count;
 	xfs_attr_trace_l_c("add", context);
 	return 0;
 }
 
-STATIC int
-xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
-		     char *name, int namelen,
-		     int valuelen, char *value)
-{
-	char *offset;
-	int arraytop;
-
-	ASSERT(context->count >= 0);
-
-	arraytop = context->count + namesp->attr_namelen + namelen + 1;
-	if (arraytop > context->firstu) {
-		context->count = -1;	/* insufficient space */
-		return 1;
-	}
-	offset = (char *)context->alist + context->count;
-	strncpy(offset, namesp->attr_name, namesp->attr_namelen);
-	offset += namesp->attr_namelen;
-	strncpy(offset, name, namelen);			/* real name */
-	offset += namelen;
-	*offset = '\0';
-	context->count += namesp->attr_namelen + namelen + 1;
-	return 0;
-}
-
-/*ARGSUSED*/
-STATIC int
-xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
-		     char *name, int namelen,
-		     int valuelen, char *value)
-{
-	context->count += namesp->attr_namelen + namelen + 1;
-	return 0;
-}
-
 /*
  * Generate a list of extended attribute names and optionally
  * also value lengths.  Positive return value follows the XFS
@@ -725,10 +711,9 @@ xfs_attr_list(
 	attrlist_cursor_kern_t *cursor)
 {
 	xfs_attr_list_context_t context;
+	struct attrlist *alist;
 	int error;
 
-	XFS_STATS_INC(xs_attr_list);
-
 	/*
 	 * Validate the cursor.
 	 */
@@ -749,52 +734,23 @@ xfs_attr_list(
 	/*
 	 * Initialize the output buffer.
 	 */
+	memset(&context, 0, sizeof(context));
 	context.dp = dp;
 	context.cursor = cursor;
-	context.count = 0;
-	context.dupcnt = 0;
 	context.resynch = 1;
 	context.flags = flags;
-	context.seen_enough = 0;
-	context.alist = (attrlist_t *)buffer;
-	context.put_value = 0;
-
-	if (flags & ATTR_KERNAMELS) {
-		context.bufsize = bufsize;
-		context.firstu = context.bufsize;
-		if (flags & ATTR_KERNOVAL)
-			context.put_listent = xfs_attr_kern_list_sizes;
-		else
-			context.put_listent = xfs_attr_kern_list;
-	} else {
-		context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
-		context.firstu = context.bufsize;
-		context.alist->al_count = 0;
-		context.alist->al_more = 0;
-		context.alist->al_offset[0] = context.bufsize;
-		context.put_listent = xfs_attr_put_listent;
-	}
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return EIO;
+	context.alist = buffer;
+	context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
+	context.firstu = context.bufsize;
+	context.put_listent = xfs_attr_put_listent;
 
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	xfs_attr_trace_l_c("syscall start", &context);
+	alist = (struct attrlist *)context.alist;
+	alist->al_count = 0;
+	alist->al_more = 0;
+	alist->al_offset[0] = context.bufsize;
 
 	error = xfs_attr_list_int(&context);
-
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
-	xfs_attr_trace_l_c("syscall end", &context);
-
-	if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
-		/* must return negated buffer size or the error */
-		if (context.count < 0)
-			error = XFS_ERROR(ERANGE);
-		else
-			error = -context.count;
-	} else
-		ASSERT(error >= 0);
-
+	ASSERT(error >= 0);
 	return error;
 }
 
@@ -2357,12 +2313,7 @@ xfs_attr_trace_enter(int type, char *where,
 		(void *)((__psunsigned_t)context->bufsize),
 		(void *)((__psunsigned_t)context->count),
 		(void *)((__psunsigned_t)context->firstu),
-		(void *)((__psunsigned_t)
-			(((context->count > 0) &&
-			!(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-				? (ATTR_ENTRY(context->alist,
-					      context->count-1)->a_valuelen)
-				: 0)),
+		NULL,
 		(void *)((__psunsigned_t)context->dupcnt),
 		(void *)((__psunsigned_t)context->flags),
 		(void *)a13, (void *)a14, (void *)a15);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index c1f7d43e5ec..41469434f41 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
 #ifndef __XFS_ATTR_H__
 #define	__XFS_ATTR_H__
 
+struct xfs_inode;
+struct xfs_da_args;
+struct xfs_attr_list_context;
+
 /*
- * xfs_attr.h
- *
  * Large attribute lists are structured around Btrees where all the data
  * elements are in the leaf nodes.  Attribute names are hashed into an int,
  * then that int is used as the index into the Btree.  Since the hashval
@@ -35,17 +37,6 @@
  * External interfaces
  *========================================================================*/
 
-struct cred;
-struct xfs_attr_list_context;
-
-typedef struct attrnames {
-	char *		attr_name;
-	unsigned int	attr_namelen;
-} attrnames_t;
-
-extern struct attrnames attr_user;
-extern struct attrnames attr_secure;
-extern struct attrnames attr_trusted;
 
 #define ATTR_DONTFOLLOW	0x0001	/* -- unused, from IRIX -- */
 #define ATTR_ROOT	0x0002	/* use attrs in root (trusted) namespace */
@@ -54,14 +45,8 @@ extern struct attrnames attr_trusted;
 #define ATTR_CREATE	0x0010	/* pure create: fail if attr already exists */
 #define ATTR_REPLACE	0x0020	/* pure set: fail if attr does not exist */
 
-#define ATTR_KERNACCESS	0x0400	/* [kernel] iaccess, inode held io-locked */
 #define ATTR_KERNOTIME	0x1000	/* [kernel] don't update inode timestamps */
 #define ATTR_KERNOVAL	0x2000	/* [kernel] get attr size only, not value */
-#define ATTR_KERNAMELS	0x4000	/* [kernel] list attr names (simple list) */
-
-#define ATTR_KERNORMALS	0x0800	/* [kernel] normal attr list: user+secure */
-#define ATTR_KERNROOTLS	0x8000	/* [kernel] include root in the attr list */
-#define ATTR_KERNFULLS	(ATTR_KERNORMALS|ATTR_KERNROOTLS)
 
 /*
  * The maximum size (into the kernel or returned from the kernel) of an
@@ -129,20 +114,40 @@ typedef struct attrlist_cursor_kern {
 
 
 /*========================================================================
- * Function prototypes for the kernel.
+ * Structure used to pass context around among the routines.
  *========================================================================*/
 
-struct xfs_inode;
-struct attrlist_cursor_kern;
-struct xfs_da_args;
+
+typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
+				      char *, int, int, char *);
+
+typedef struct xfs_attr_list_context {
+	struct xfs_inode		*dp;		/* inode */
+	struct attrlist_cursor_kern	*cursor;	/* position in list */
+	char				*alist;		/* output buffer */
+	int				seen_enough;	/* T/F: seen enough of list? */
+	int				count;		/* num used entries */
+	int				dupcnt;		/* count dup hashvals seen */
+	int				bufsize;	/* total buffer size */
+	int				firstu;		/* first used byte in buffer */
+	int				flags;		/* from VOP call */
+	int				resynch;	/* T/F: resynch with cursor */
+	int				put_value;	/* T/F: need value for listent */
+	put_listent_func_t		put_listent;	/* list output fmt function */
+	int				index;		/* index into output buffer */
+} xfs_attr_list_context_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
 
 /*
  * Overall external interface routines.
  */
 int xfs_attr_inactive(struct xfs_inode *dp);
-
-int xfs_attr_shortform_getvalue(struct xfs_da_args *);
 int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
 
 #endif	/* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index cb345e6e485..23ef5d7c87e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
  * Namespace helper routines
  *========================================================================*/
 
-STATIC_INLINE attrnames_t *
-xfs_attr_flags_namesp(int flags)
-{
-	return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
-		  ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
-}
-
 /*
  * If namespace bits don't match return 0.
  * If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
 	return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
 }
 
-/*
- * If namespace bits don't match and we don't have an override for it
- * then return 0.
- * If all match or are overridable then return 1.
- */
-STATIC_INLINE int
-xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
-{
-	if (((arg_flags & ATTR_SECURE) == 0) !=
-	    ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
-	    !(arg_flags & ATTR_KERNORMALS))
-		return 0;
-	if (((arg_flags & ATTR_ROOT) == 0) !=
-	    ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
-	    !(arg_flags & ATTR_KERNROOTLS))
-		return 0;
-	return 1;
-}
-
 
 /*========================================================================
  * External routines when attribute fork size < XFS_LITINO(mp).
@@ -626,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	    (XFS_ISRESET_CURSOR(cursor) &&
              (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
 		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
-			attrnames_t	*namesp;
-
-			if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
-				sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-				continue;
-			}
-			namesp = xfs_attr_flags_namesp(sfe->flags);
 			error = context->put_listent(context,
-					   namesp,
+					   sfe->flags,
 					   (char *)sfe->nameval,
 					   (int)sfe->namelen,
 					   (int)sfe->valuelen,
@@ -681,10 +648,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 			kmem_free(sbuf);
 			return XFS_ERROR(EFSCORRUPTED);
 		}
-		if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
-			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-			continue;
-		}
+
 		sbp->entno = i;
 		sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
 		sbp->name = (char *)sfe->nameval;
@@ -728,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 * Loop putting entries into the user buffer.
 	 */
 	for ( ; i < nsbuf; i++, sbp++) {
-		attrnames_t	*namesp;
-
-		namesp = xfs_attr_flags_namesp(sbp->flags);
-
 		if (cursor->hashval != sbp->hash) {
 			cursor->hashval = sbp->hash;
 			cursor->offset = 0;
 		}
 		error = context->put_listent(context,
-					namesp,
+					sbp->flags,
 					sbp->name,
 					sbp->namelen,
 					sbp->valuelen,
@@ -2402,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 	 */
 	retval = 0;
 	for (  ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
-		attrnames_t *namesp;
-
 		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
 			cursor->hashval = be32_to_cpu(entry->hashval);
 			cursor->offset = 0;
@@ -2411,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 
 		if (entry->flags & XFS_ATTR_INCOMPLETE)
 			continue;		/* skip incomplete entries */
-		if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
-			continue;
-
-		namesp = xfs_attr_flags_namesp(entry->flags);
 
 		if (entry->flags & XFS_ATTR_LOCAL) {
 			xfs_attr_leaf_name_local_t *name_loc =
 				XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
 
 			retval = context->put_listent(context,
-						namesp,
+						entry->flags,
 						(char *)name_loc->nameval,
 						(int)name_loc->namelen,
 						be16_to_cpu(name_loc->valuelen),
@@ -2448,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
 				if (retval)
 					return retval;
 				retval = context->put_listent(context,
-						namesp,
+						entry->flags,
 						(char *)name_rmt->name,
 						(int)name_rmt->namelen,
 						valuelen,
 						(char*)args.value);
 				kmem_free(args.value);
-			}
-			else {
+			} else {
 				retval = context->put_listent(context,
-						namesp,
+						entry->flags,
 						(char *)name_rmt->name,
 						(int)name_rmt->namelen,
 						valuelen,
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e..5ecf437b782 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
 
 struct attrlist;
 struct attrlist_cursor_kern;
-struct attrnames;
+struct xfs_attr_list_context;
 struct xfs_dabuf;
 struct xfs_da_args;
 struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
 	return (((bsize) >> 1) + ((bsize) >> 2));
 }
 
-
-/*========================================================================
- * Structure used to pass context around among the routines.
- *========================================================================*/
-
-
-struct xfs_attr_list_context;
-
-typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
-				      char *, int, int, char *);
-
-typedef struct xfs_attr_list_context {
-	struct xfs_inode		*dp;		/* inode */
-	struct attrlist_cursor_kern	*cursor;	/* position in list */
-	struct attrlist			*alist;		/* output buffer */
-	int				seen_enough;	/* T/F: seen enough of list? */
-	int				count;		/* num used entries */
-	int				dupcnt;		/* count dup hashvals seen */
-	int				bufsize;	/* total buffer size */
-	int				firstu;		/* first used byte in buffer */
-	int				flags;		/* from VOP call */
-	int				resynch;	/* T/F: resynch with cursor */
-	int				put_value;	/* T/F: need value for listent */
-	put_listent_func_t		put_listent;	/* list output fmt function */
-	int				index;		/* index into output buffer */
-} xfs_attr_list_context_t;
-
 /*
  * Used to keep a list of "remote value" extents when unlinking an inode.
  */
-- 
cgit v1.2.3-70-g09d2


From 7f871d5d1b9b126c1a0cece737a37c6980c988e3 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Mon, 23 Jun 2008 13:23:57 +1000
Subject: [XFS] make inode reclaim wait for log I/O to complete

During a forced shutdown a xfs inode can be destroyed before log I/O
involving that inode is complete. We need to wait for the inode to be
unpinned before tearing it down. Version 2 cleans up the code a bit by
relying on xfs_iflush() to do the unpinning and forced shutdown check.

SGI-PV: 981240

SGI-Modid: xfs-linux-melb:xfs-kern:31326a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_inode.c    |  2 --
 fs/xfs/xfs_vnodeops.c | 30 ++++++++----------------------
 2 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 199a36ac8e2..fcb1dcc6f03 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3082,8 +3082,6 @@ xfs_iflush(
 	 * flush lock and do nothing.
 	 */
 	if (xfs_inode_clean(ip)) {
-		ASSERT((iip != NULL) ?
-			 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
 		xfs_ifunlock(ip);
 		return 0;
 	}
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b6a065eb25a..d76565bfcb7 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3260,7 +3260,6 @@ xfs_finish_reclaim(
 {
 	xfs_perag_t	*pag = xfs_get_perag(ip->i_mount, ip->i_ino);
 	bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
-	int		error;
 
 	if (vp && VN_BAD(vp))
 		goto reclaim;
@@ -3303,29 +3302,16 @@ xfs_finish_reclaim(
 		xfs_iflock(ip);
 	}
 
-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		if (ip->i_update_core ||
-		    ((ip->i_itemp != NULL) &&
-		     (ip->i_itemp->ili_format.ilf_fields != 0))) {
-			error = xfs_iflush(ip, sync_mode);
-			/*
-			 * If we hit an error, typically because of filesystem
-			 * shutdown, we don't need to let vn_reclaim to know
-			 * because we're gonna reclaim the inode anyway.
-			 */
-			if (error) {
-				xfs_iunlock(ip, XFS_ILOCK_EXCL);
-				goto reclaim;
-			}
-			xfs_iflock(ip); /* synchronize with xfs_iflush_done */
-		}
-
-		ASSERT(ip->i_update_core == 0);
-		ASSERT(ip->i_itemp == NULL ||
-		       ip->i_itemp->ili_format.ilf_fields == 0);
+	/*
+	 * In the case of a forced shutdown we rely on xfs_iflush() to
+	 * wait for the inode to be unpinned before returning an error.
+	 */
+	if (xfs_iflush(ip, sync_mode) == 0) {
+		/* synchronize with xfs_iflush_done */
+		xfs_iflock(ip);
+		xfs_ifunlock(ip);
 	}
 
-	xfs_ifunlock(ip);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
  reclaim:
-- 
cgit v1.2.3-70-g09d2


From 6278debdf95b100a516b803f90d6f11b41c34171 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Mon, 23 Jun 2008 13:25:02 +1000
Subject: [XFS] fix extent corruption in xfs_iext_irec_compact_full()

This function is used to compact the indirect extent list by moving
extents from one page to the previous to fill them up. After we move some
extents to an earlier page we need to shuffle the remaining extents to the
start of the page. The actual bug here is the second argument to memmove()
needs to index past the extents, that were copied to the previous page,
and move the remaining extents. For pages that are already full (ie
ext_avail == 0) the compaction code has no net effect so don't do it.

SGI-PV: 983337

SGI-Modid: xfs-linux-melb:xfs-kern:31332a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_inode.c | 70 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fcb1dcc6f03..bedc6616317 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4532,39 +4532,63 @@ xfs_iext_irec_compact_full(
 	int		nlists;			/* number of irec's (ex lists) */
 
 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+
 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
 	erp = ifp->if_u1.if_ext_irec;
 	ep = &erp->er_extbuf[erp->er_extcount];
 	erp_next = erp + 1;
 	ep_next = erp_next->er_extbuf;
+
 	while (erp_idx < nlists - 1) {
+		/*
+		 * Check how many extent records are available in this irec.
+		 * If there is none skip the whole exercise.
+		 */
 		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-		ext_diff = MIN(ext_avail, erp_next->er_extcount);
-		memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
-		erp->er_extcount += ext_diff;
-		erp_next->er_extcount -= ext_diff;
-		/* Remove next page */
-		if (erp_next->er_extcount == 0) {
+		if (ext_avail) {
+
 			/*
-			 * Free page before removing extent record
-			 * so er_extoffs don't get modified in
-			 * xfs_iext_irec_remove.
+			 * Copy over as many as possible extent records into
+			 * the previous page.
 			 */
-			kmem_free(erp_next->er_extbuf);
-			erp_next->er_extbuf = NULL;
-			xfs_iext_irec_remove(ifp, erp_idx + 1);
-			erp = &ifp->if_u1.if_ext_irec[erp_idx];
-			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-		/* Update next page */
-		} else {
-			/* Move rest of page up to become next new page */
-			memmove(erp_next->er_extbuf, ep_next,
-				erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
-			ep_next = erp_next->er_extbuf;
-			memset(&ep_next[erp_next->er_extcount], 0,
-				(XFS_LINEAR_EXTS - erp_next->er_extcount) *
-				sizeof(xfs_bmbt_rec_t));
+			ext_diff = MIN(ext_avail, erp_next->er_extcount);
+			memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
+			erp->er_extcount += ext_diff;
+			erp_next->er_extcount -= ext_diff;
+
+			/*
+			 * If the next irec is empty now we can simply
+			 * remove it.
+			 */
+			if (erp_next->er_extcount == 0) {
+				/*
+				 * Free page before removing extent record
+				 * so er_extoffs don't get modified in
+				 * xfs_iext_irec_remove.
+				 */
+				kmem_free(erp_next->er_extbuf);
+				erp_next->er_extbuf = NULL;
+				xfs_iext_irec_remove(ifp, erp_idx + 1);
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+				nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+			/*
+			 * If the next irec is not empty move up the content
+			 * that has not been copied to the previous page to
+			 * the beggining of this one.
+			 */
+			} else {
+				memmove(erp_next->er_extbuf, &ep_next[ext_diff],
+					erp_next->er_extcount *
+					sizeof(xfs_bmbt_rec_t));
+				ep_next = erp_next->er_extbuf;
+				memset(&ep_next[erp_next->er_extcount], 0,
+					(XFS_LINEAR_EXTS -
+						erp_next->er_extcount) *
+					sizeof(xfs_bmbt_rec_t));
+			}
 		}
+
 		if (erp->er_extcount == XFS_LINEAR_EXTS) {
 			erp_idx++;
 			if (erp_idx < nlists)
-- 
cgit v1.2.3-70-g09d2


From 61f10fad1947116055c694321d9d8f21152c0582 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Mon, 23 Jun 2008 13:25:09 +1000
Subject: [XFS] Fix up warning for xfs_vn_listxatt's call of list_one_attr()
 with context count of ssize_t versus int. Change context count to be ssize_t.

SGI-PV: 983395

SGI-Modid: xfs-linux-melb:xfs-kern:31333a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_attr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 41469434f41..3115dcc6723 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -126,7 +126,7 @@ typedef struct xfs_attr_list_context {
 	struct attrlist_cursor_kern	*cursor;	/* position in list */
 	char				*alist;		/* output buffer */
 	int				seen_enough;	/* T/F: seen enough of list? */
-	int				count;		/* num used entries */
+	ssize_t				count;		/* num used entries */
 	int				dupcnt;		/* count dup hashvals seen */
 	int				bufsize;	/* total buffer size */
 	int				firstu;		/* first used byte in buffer */
-- 
cgit v1.2.3-70-g09d2


From 8f112e3bc3508afc8d1612868d178359446c08fd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 23 Jun 2008 13:25:17 +1000
Subject: [XFS] Merge xfs_rmdir into xfs_remove

xfs_remove and xfs_rmdir are almost the same with a little more work
performed in xfs_rmdir due to the . and .. entries. This patch merges
xfs_rmdir into xfs_remove and performs these actions conditionally.

Also clean up the error handling which was a nightmare in both versions
before.

SGI-PV: 981498

SGI-Modid: xfs-linux-melb:xfs-kern:31335a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c |  54 +++-----
 fs/xfs/xfs_vnodeops.c       | 324 ++++++++++++--------------------------------
 fs/xfs/xfs_vnodeops.h       |   2 -
 3 files changed, 102 insertions(+), 278 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 3ae80155de3..1f89c19cd4c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -245,8 +245,7 @@ STATIC void
 xfs_cleanup_inode(
 	struct inode	*dir,
 	struct inode	*inode,
-	struct dentry	*dentry,
-	int		mode)
+	struct dentry	*dentry)
 {
 	struct xfs_name	teardown;
 
@@ -257,10 +256,7 @@ xfs_cleanup_inode(
 	 */
 	xfs_dentry_to_name(&teardown, dentry);
 
-	if (S_ISDIR(mode))
-		xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
-	else
-		xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
 	iput(inode);
 }
 
@@ -342,7 +338,7 @@ xfs_vn_mknod(
 	return -error;
 
  out_cleanup_inode:
-	xfs_cleanup_inode(dir, inode, dentry, mode);
+	xfs_cleanup_inode(dir, inode, dentry);
  out_free_acl:
 	if (default_acl)
 		_ACL_FREE(default_acl);
@@ -518,37 +514,11 @@ xfs_vn_symlink(
 	return 0;
 
  out_cleanup_inode:
-	xfs_cleanup_inode(dir, inode, dentry, 0);
+	xfs_cleanup_inode(dir, inode, dentry);
  out:
 	return -error;
 }
 
-STATIC int
-xfs_vn_rmdir(
-	struct inode	*dir,
-	struct dentry	*dentry)
-{
-	struct inode	*inode = dentry->d_inode;
-	struct xfs_name	name;
-	int		error;
-
-	xfs_dentry_to_name(&name, dentry);
-
-	error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
-	if (likely(!error)) {
-		xfs_validate_fields(inode);
-		xfs_validate_fields(dir);
-		/*
-		 * With rmdir, the VFS makes the dentry "negative": no inode,
-		 * but still hashed. This is incompatible with case-insensitive
-		 * mode, so invalidate (unhash) the dentry in CI-mode.
-		 */
-		if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-			d_invalidate(dentry);
-	}
-	return -error;
-}
-
 STATIC int
 xfs_vn_rename(
 	struct inode	*odir,
@@ -842,7 +812,13 @@ const struct inode_operations xfs_dir_inode_operations = {
 	.unlink			= xfs_vn_unlink,
 	.symlink		= xfs_vn_symlink,
 	.mkdir			= xfs_vn_mkdir,
-	.rmdir			= xfs_vn_rmdir,
+	/*
+	 * Yes, XFS uses the same method for rmdir and unlink.
+	 *
+	 * There are some subtile differences deeper in the code,
+	 * but we use S_ISDIR to check for those.
+	 */
+	.rmdir			= xfs_vn_unlink,
 	.mknod			= xfs_vn_mknod,
 	.rename			= xfs_vn_rename,
 	.permission		= xfs_vn_permission,
@@ -861,7 +837,13 @@ const struct inode_operations xfs_dir_ci_inode_operations = {
 	.unlink			= xfs_vn_unlink,
 	.symlink		= xfs_vn_symlink,
 	.mkdir			= xfs_vn_mkdir,
-	.rmdir			= xfs_vn_rmdir,
+	/*
+	 * Yes, XFS uses the same method for rmdir and unlink.
+	 *
+	 * There are some subtile differences deeper in the code,
+	 * but we use S_ISDIR to check for those.
+	 */
+	.rmdir			= xfs_vn_unlink,
 	.mknod			= xfs_vn_mknod,
 	.rename			= xfs_vn_rename,
 	.permission		= xfs_vn_permission,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d76565bfcb7..8297a8c5af9 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2116,13 +2116,6 @@ again:
 #endif
 }
 
-#ifdef	DEBUG
-#define	REMOVE_DEBUG_TRACE(x)	{remove_which_error_return = (x);}
-int remove_which_error_return = 0;
-#else /* ! DEBUG */
-#define	REMOVE_DEBUG_TRACE(x)
-#endif	/* ! DEBUG */
-
 int
 xfs_remove(
 	xfs_inode_t             *dp,
@@ -2131,6 +2124,7 @@ xfs_remove(
 {
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_trans_t             *tp = NULL;
+	int			is_dir = S_ISDIR(ip->i_d.di_mode);
 	int                     error = 0;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
@@ -2138,8 +2132,10 @@ xfs_remove(
 	int			committed;
 	int			link_zero;
 	uint			resblks;
+	uint			log_count;
 
 	xfs_itrace_entry(dp);
+	xfs_itrace_entry(ip);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -2152,19 +2148,23 @@ xfs_remove(
 			return error;
 	}
 
-	xfs_itrace_entry(ip);
-	xfs_itrace_ref(ip);
-
 	error = XFS_QM_DQATTACH(mp, dp, 0);
-	if (!error)
-		error = XFS_QM_DQATTACH(mp, ip, 0);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
+	if (error)
+		goto std_return;
+
+	error = XFS_QM_DQATTACH(mp, ip, 0);
+	if (error)
 		goto std_return;
-	}
 
-	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+	if (is_dir) {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
+		log_count = XFS_DEFAULT_LOG_COUNT;
+	} else {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+		log_count = XFS_REMOVE_LOG_COUNT;
+	}
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+
 	/*
 	 * We try to get the real space reservation first,
 	 * allowing for directory btree deletion(s) implying
@@ -2176,25 +2176,21 @@ xfs_remove(
 	 */
 	resblks = XFS_REMOVE_SPACE_RES(mp);
 	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+				  XFS_TRANS_PERM_LOG_RES, log_count);
 	if (error == ENOSPC) {
 		resblks = 0;
 		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+					  XFS_TRANS_PERM_LOG_RES, log_count);
 	}
 	if (error) {
 		ASSERT(error != ENOSPC);
-		REMOVE_DEBUG_TRACE(__LINE__);
-		xfs_trans_cancel(tp, 0);
-		return error;
+		cancel_flags = 0;
+		goto out_trans_cancel;
 	}
 
 	error = xfs_lock_dir_and_entry(dp, ip);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		xfs_trans_cancel(tp, cancel_flags);
-		goto std_return;
-	}
+	if (error)
+		goto out_trans_cancel;
 
 	/*
 	 * At this point, we've gotten both the directory and the entry
@@ -2206,6 +2202,21 @@ xfs_remove(
 	IHOLD(dp);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
+	/*
+	 * If we're removing a directory perform some additional validation.
+	 */
+	if (is_dir) {
+		ASSERT(ip->i_d.di_nlink >= 2);
+		if (ip->i_d.di_nlink != 2) {
+			error = XFS_ERROR(ENOTEMPTY);
+			goto out_trans_cancel;
+		}
+		if (!xfs_dir_isempty(ip)) {
+			error = XFS_ERROR(ENOTEMPTY);
+			goto out_trans_cancel;
+		}
+	}
+
 	/*
 	 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
 	 */
@@ -2214,39 +2225,64 @@ xfs_remove(
 					&first_block, &free_list, resblks);
 	if (error) {
 		ASSERT(error != ENOENT);
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto error1;
+		goto out_bmap_cancel;
 	}
 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
+	/*
+	 * Bump the in memory generation count on the parent
+	 * directory so that other can know that it has changed.
+	 */
 	dp->i_gen++;
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
-	error = xfs_droplink(tp, ip);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto error1;
+	if (is_dir) {
+		/*
+		 * Drop the link from ip's "..".
+		 */
+		error = xfs_droplink(tp, dp);
+		if (error)
+			goto out_bmap_cancel;
+
+		/*
+		 * Drop the link from dp to ip.
+		 */
+		error = xfs_droplink(tp, ip);
+		if (error)
+			goto out_bmap_cancel;
+	} else {
+		/*
+		 * When removing a non-directory we need to log the parent
+		 * inode here for the i_gen update.  For a directory this is
+		 * done implicitly by the xfs_droplink call for the ".." entry.
+		 */
+		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 	}
 
-	/* Determine if this is the last link while
+	/*
+	 * Drop the "." link from ip to self.
+	 */
+	error = xfs_droplink(tp, ip);
+	if (error)
+		goto out_bmap_cancel;
+
+	/*
+	 * Determine if this is the last link while
 	 * we are in the transaction.
 	 */
-	link_zero = (ip)->i_d.di_nlink==0;
+	link_zero = (ip->i_d.di_nlink == 0);
 
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * remove transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 		xfs_trans_set_sync(tp);
-	}
 
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto error_rele;
-	}
+	if (error)
+		goto out_bmap_cancel;
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 	if (error)
@@ -2258,38 +2294,26 @@ xfs_remove(
 	 * will get killed on last close in xfs_close() so we don't
 	 * have to worry about that.
 	 */
-	if (link_zero && xfs_inode_is_filestream(ip))
+	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
 		xfs_filestream_deassociate(ip);
 
 	xfs_itrace_exit(ip);
+	xfs_itrace_exit(dp);
 
-/*	Fall through to std_return with error = 0 */
  std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-				dp, DM_RIGHT_NULL,
-				NULL, DM_RIGHT_NULL,
-				name->name, NULL, ip->i_d.di_mode, error, 0);
+		XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
+				NULL, DM_RIGHT_NULL, name->name, NULL,
+				ip->i_d.di_mode, error, 0);
 	}
-	return error;
 
- error1:
-	xfs_bmap_cancel(&free_list);
-	cancel_flags |= XFS_TRANS_ABORT;
-	xfs_trans_cancel(tp, cancel_flags);
-	goto std_return;
+	return error;
 
- error_rele:
-	/*
-	 * In this case make sure to not release the inode until after
-	 * the current transaction is aborted.  Releasing it beforehand
-	 * can cause us to go to xfs_inactive and start a recursive
-	 * transaction which can easily deadlock with the current one.
-	 */
+ out_bmap_cancel:
 	xfs_bmap_cancel(&free_list);
 	cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
 	xfs_trans_cancel(tp, cancel_flags);
-
 	goto std_return;
 }
 
@@ -2655,186 +2679,6 @@ std_return:
 	goto std_return;
 }
 
-int
-xfs_rmdir(
-	xfs_inode_t             *dp,
-	struct xfs_name		*name,
-	xfs_inode_t		*cdp)
-{
-	xfs_mount_t		*mp = dp->i_mount;
-	xfs_trans_t             *tp;
-	int                     error;
-	xfs_bmap_free_t         free_list;
-	xfs_fsblock_t           first_block;
-	int			cancel_flags;
-	int			committed;
-	int			last_cdp_link;
-	uint			resblks;
-
-	xfs_itrace_entry(dp);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
-					dp, DM_RIGHT_NULL,
-					NULL, DM_RIGHT_NULL, name->name,
-					NULL, cdp->i_d.di_mode, 0, 0);
-		if (error)
-			return XFS_ERROR(error);
-	}
-
-	/*
-	 * Get the dquots for the inodes.
-	 */
-	error = XFS_QM_DQATTACH(mp, dp, 0);
-	if (!error)
-		error = XFS_QM_DQATTACH(mp, cdp, 0);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto std_return;
-	}
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-	/*
-	 * We try to get the real space reservation first,
-	 * allowing for directory btree deletion(s) implying
-	 * possible bmap insert(s).  If we can't get the space
-	 * reservation then we use 0 instead, and avoid the bmap
-	 * btree insert(s) in the directory code by, if the bmap
-	 * insert tries to happen, instead trimming the LAST
-	 * block from the directory.
-	 */
-	resblks = XFS_REMOVE_SPACE_RES(mp);
-	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
-	if (error == ENOSPC) {
-		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
-	}
-	if (error) {
-		ASSERT(error != ENOSPC);
-		cancel_flags = 0;
-		goto error_return;
-	}
-	XFS_BMAP_INIT(&free_list, &first_block);
-
-	/*
-	 * Now lock the child directory inode and the parent directory
-	 * inode in the proper order.  This will take care of validating
-	 * that the directory entry for the child directory inode has
-	 * not changed while we were obtaining a log reservation.
-	 */
-	error = xfs_lock_dir_and_entry(dp, cdp);
-	if (error) {
-		xfs_trans_cancel(tp, cancel_flags);
-		goto std_return;
-	}
-
-	IHOLD(dp);
-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-
-	IHOLD(cdp);
-	xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
-
-	ASSERT(cdp->i_d.di_nlink >= 2);
-	if (cdp->i_d.di_nlink != 2) {
-		error = XFS_ERROR(ENOTEMPTY);
-		goto error_return;
-	}
-	if (!xfs_dir_isempty(cdp)) {
-		error = XFS_ERROR(ENOTEMPTY);
-		goto error_return;
-	}
-
-	error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
-					&first_block, &free_list, resblks);
-	if (error)
-		goto error1;
-
-	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-	/*
-	 * Bump the in memory generation count on the parent
-	 * directory so that other can know that it has changed.
-	 */
-	dp->i_gen++;
-
-	/*
-	 * Drop the link from cdp's "..".
-	 */
-	error = xfs_droplink(tp, dp);
-	if (error) {
-		goto error1;
-	}
-
-	/*
-	 * Drop the link from dp to cdp.
-	 */
-	error = xfs_droplink(tp, cdp);
-	if (error) {
-		goto error1;
-	}
-
-	/*
-	 * Drop the "." link from cdp to self.
-	 */
-	error = xfs_droplink(tp, cdp);
-	if (error) {
-		goto error1;
-	}
-
-	/* Determine these before committing transaction */
-	last_cdp_link = (cdp)->i_d.di_nlink==0;
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * rmdir transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-		xfs_trans_set_sync(tp);
-	}
-
-	error = xfs_bmap_finish (&tp, &free_list, &committed);
-	if (error) {
-		xfs_bmap_cancel(&free_list);
-		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-				 XFS_TRANS_ABORT));
-		goto std_return;
-	}
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (error) {
-		goto std_return;
-	}
-
-
-	/* Fall through to std_return with error = 0 or the errno
-	 * from xfs_trans_commit. */
- std_return:
-	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-					dp, DM_RIGHT_NULL,
-					NULL, DM_RIGHT_NULL,
-					name->name, NULL, cdp->i_d.di_mode,
-					error, 0);
-	}
-	return error;
-
- error1:
-	xfs_bmap_cancel(&free_list);
-	cancel_flags |= XFS_TRANS_ABORT;
-	/* FALLTHROUGH */
-
- error_return:
-	xfs_trans_cancel(tp, cancel_flags);
-	goto std_return;
-}
-
 int
 xfs_symlink(
 	xfs_inode_t		*dp,
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 7e9a8b241f2..454fa9a3e52 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -31,8 +31,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 		struct xfs_name *target_name);
 int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
 		mode_t mode, struct xfs_inode **ipp, struct cred *credp);
-int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
-		struct xfs_inode *cdp);
 int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 		       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-- 
cgit v1.2.3-70-g09d2


From e5700704b2b0853c059e424284cceeff3032ea28 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 23 Jun 2008 13:25:25 +1000
Subject: [XFS] Don't update i_size for directories and special files

The core kernel uses vfs_getattr to look at the inode size and similar
attributes, so there is no need to keep i_size uptodate for directories or
special files. This means we can remove xfs_validate_fields because the
I/O path already keeps i_size uptodate for regular files.

SGI-PV: 981498

SGI-Modid: xfs-linux-melb:xfs-kern:31336a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 61 ++++++++++-----------------------------------
 1 file changed, 13 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1f89c19cd4c..7b42569968f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -181,23 +181,6 @@ xfs_ichgtime_fast(
 		mark_inode_dirty_sync(inode);
 }
 
-
-/*
- * Pull the link count and size up from the xfs inode to the linux inode
- */
-STATIC void
-xfs_validate_fields(
-	struct inode		*inode)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	loff_t size;
-
-	/* we're under i_sem so i_size can't change under us */
-	size = XFS_ISIZE(ip);
-	if (i_size_read(inode) != size)
-		i_size_write(inode, size);
-}
-
 /*
  * Hook in SELinux.  This is not quite correct yet, what we really need
  * here (as we do for default ACLs) is a mechanism by which creation of
@@ -331,10 +314,7 @@ xfs_vn_mknod(
 	}
 
 
-	if (S_ISDIR(mode))
-		xfs_validate_fields(inode);
 	d_instantiate(dentry, inode);
-	xfs_validate_fields(dir);
 	return -error;
 
  out_cleanup_inode:
@@ -450,7 +430,6 @@ xfs_vn_link(
 	}
 
 	xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
-	xfs_validate_fields(inode);
 	d_instantiate(dentry, inode);
 	return 0;
 }
@@ -460,26 +439,23 @@ xfs_vn_unlink(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
-	struct inode	*inode;
 	struct xfs_name	name;
 	int		error;
 
-	inode = dentry->d_inode;
 	xfs_dentry_to_name(&name, dentry);
 
-	error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
-	if (likely(!error)) {
-		xfs_validate_fields(dir);	/* size needs update */
-		xfs_validate_fields(inode);
-		/*
-		 * With unlink, the VFS makes the dentry "negative": no inode,
-		 * but still hashed. This is incompatible with case-insensitive
-		 * mode, so invalidate (unhash) the dentry in CI-mode.
-		 */
-		if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-			d_invalidate(dentry);
-	}
-	return -error;
+	error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+	if (error)
+		return error;
+
+	/*
+	 * With unlink, the VFS makes the dentry "negative": no inode,
+	 * but still hashed. This is incompatible with case-insensitive
+	 * mode, so invalidate (unhash) the dentry in CI-mode.
+	 */
+	if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+		d_invalidate(dentry);
+	return 0;
 }
 
 STATIC int
@@ -509,8 +485,6 @@ xfs_vn_symlink(
 		goto out_cleanup_inode;
 
 	d_instantiate(dentry, inode);
-	xfs_validate_fields(dir);
-	xfs_validate_fields(inode);
 	return 0;
 
  out_cleanup_inode:
@@ -529,22 +503,13 @@ xfs_vn_rename(
 	struct inode	*new_inode = ndentry->d_inode;
 	struct xfs_name	oname;
 	struct xfs_name	nname;
-	int		error;
 
 	xfs_dentry_to_name(&oname, odentry);
 	xfs_dentry_to_name(&nname, ndentry);
 
-	error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+	return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
 			   XFS_I(ndir), &nname, new_inode ?
 			   			XFS_I(new_inode) : NULL);
-	if (likely(!error)) {
-		if (new_inode)
-			xfs_validate_fields(new_inode);
-		xfs_validate_fields(odir);
-		if (ndir != odir)
-			xfs_validate_fields(ndir);
-	}
-	return -error;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 90bb7ab077a63facbe3aa0b9e3763a0cb956a4c1 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Mon, 23 Jun 2008 13:25:38 +1000
Subject: [XFS] Fix returning case-preserved name with CI node form directories

xfs_dir2_node_lookup() calls xfs_da_node_lookup_int() which iterates
through leaf blocks containing the matching hash value for the name being
looked up. Inside xfs_da_node_lookup_int(), it calls the
xfs_dir2_leafn_lookup_for_entry() for each leaf block.
xfs_dir2_leafn_lookup_for_entry() iterates through each matching
hash/offset pair doing a name comparison to find the matching dirent.

For CI mode, the state->extrablk retains the details of the block that has
the CI match so xfs_dir2_node_lookup() can return the case-preserved name.

The original implementation didn't retain the xfs_da_buf_t properly, so
the lookup was returning a bogus name to be stored in the dentry.

In the case of unlink, the bad name was passed and in debug mode, ASSERTed
when it can't find the entry.

SGI-PV: 983284

SGI-Modid: xfs-linux-melb:xfs-kern:31337a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_dir2_node.c | 69 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 1b543022346..fa6c3a5ddbc 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -549,7 +549,6 @@ xfs_dir2_leafn_lookup_for_entry(
 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return value */
-	int			di = -1;	/* data entry index */
 	int			index;		/* leaf entry index */
 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
@@ -577,7 +576,6 @@ xfs_dir2_leafn_lookup_for_entry(
 	if (state->extravalid) {
 		curbp = state->extrablk.bp;
 		curdb = state->extrablk.blkno;
-		di = state->extrablk.index;
 	}
 	/*
 	 * Loop over leaf entries with the right hash value.
@@ -602,17 +600,27 @@ xfs_dir2_leafn_lookup_for_entry(
 		 */
 		if (newdb != curdb) {
 			/*
-			 * If we had a block before, drop it.
+			 * If we had a block before that we aren't saving
+			 * for a CI name, drop it
 			 */
-			if (curbp)
+			if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
+						curdb != state->extrablk.blkno))
 				xfs_da_brelse(tp, curbp);
 			/*
-			 * Read the data block.
+			 * If needing the block that is saved with a CI match,
+			 * use it otherwise read in the new data block.
 			 */
-			error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp,
-					newdb), -1, &curbp, XFS_DATA_FORK);
-			if (error)
-				return error;
+			if (args->cmpresult != XFS_CMP_DIFFERENT &&
+					newdb == state->extrablk.blkno) {
+				ASSERT(state->extravalid);
+				curbp = state->extrablk.bp;
+			} else {
+				error = xfs_da_read_buf(tp, dp,
+						xfs_dir2_db_to_da(mp, newdb),
+						-1, &curbp, XFS_DATA_FORK);
+				if (error)
+					return error;
+			}
 			xfs_dir2_data_check(dp, curbp);
 			curdb = newdb;
 		}
@@ -624,38 +632,47 @@ xfs_dir2_leafn_lookup_for_entry(
 		/*
 		 * Compare the entry and if it's an exact match, return
 		 * EEXIST immediately. If it's the first case-insensitive
-		 * match, store the inode number and continue looking.
+		 * match, store the block & inode number and continue looking.
 		 */
 		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+			/* If there is a CI match block, drop it */
+			if (args->cmpresult != XFS_CMP_DIFFERENT &&
+						curdb != state->extrablk.blkno)
+				xfs_da_brelse(tp, state->extrablk.bp);
 			args->cmpresult = cmp;
 			args->inumber = be64_to_cpu(dep->inumber);
-			di = (int)((char *)dep - (char *)curbp->data);
-			error = EEXIST;
+			*indexp = index;
+			state->extravalid = 1;
+			state->extrablk.bp = curbp;
+			state->extrablk.blkno = curdb;
+			state->extrablk.index = (int)((char *)dep -
+							(char *)curbp->data);
+			state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
 			if (cmp == XFS_CMP_EXACT)
-				goto out;
+				return XFS_ERROR(EEXIST);
 		}
 	}
-	/* Didn't find an exact match. */
-	error = ENOENT;
 	ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
 					(args->op_flags & XFS_DA_OP_OKNOENT));
-out:
 	if (curbp) {
-		/* Giving back a data block. */
-		state->extravalid = 1;
-		state->extrablk.bp = curbp;
-		state->extrablk.index = di;
-		state->extrablk.blkno = curdb;
-		state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+		if (args->cmpresult == XFS_CMP_DIFFERENT) {
+			/* Giving back last used data block. */
+			state->extravalid = 1;
+			state->extrablk.bp = curbp;
+			state->extrablk.index = -1;
+			state->extrablk.blkno = curdb;
+			state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+		} else {
+			/* If the curbp is not the CI match block, drop it */
+			if (state->extrablk.bp != curbp)
+				xfs_da_brelse(tp, curbp);
+		}
 	} else {
 		state->extravalid = 0;
 	}
-	/*
-	 * Return the index, that will be the deletion point for remove/replace.
-	 */
 	*indexp = index;
-	return XFS_ERROR(error);
+	return XFS_ERROR(ENOENT);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 6bd8fc8a55cba263bab0b1c24786e95c5a2dc720 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Mon, 23 Jun 2008 13:25:46 +1000
Subject: [XFS] Convert ASSERTs to XFS_WANT_CORRUPTED_GOTOs

ASSERTs are no good to us on a non-debug build so use
XFS_WANT_CORRUPTED_GOTOs to report extent btree corruption ASAP.

SGI-PV: 983500

SGI-Modid: xfs-linux-melb:xfs-kern:31338a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_bmap.c | 101 +++++++++++++++++++++++++++---------------------------
 1 file changed, 51 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a612a90aae4..c21e01a9b2d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
 		cur->bc_private.b.firstblock = *firstblock;
 		if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
 			goto error0;
-		ASSERT(stat == 1);	/* must be at least one entry */
+		/* must be at least one entry */
+		XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
 		if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
 			goto error0;
 		if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
 					RIGHT.br_startblock,
 					RIGHT.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
 					LEFT.br_startblock,
 					LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
 					LEFT.br_startblock, LEFT.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
 					LEFT.br_startblock,
 					LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
 					RIGHT.br_startblock,
 					RIGHT.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
 					new->br_startblock,
 					PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		*dnew = 0;
 		/* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
 					LEFT.br_startblock, LEFT.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
 					LEFT.br_startblock,
 					LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
 					RIGHT.br_startblock,
 					RIGHT.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
 					new->br_startblock,
 					new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
 					RIGHT.br_startblock,
 					RIGHT.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
 				LEFT.br_startblock,
 				LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock, PREV.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
 				LEFT.br_startblock,
 				LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
 					RIGHT.br_startblock,
 					RIGHT.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
 				new->br_startblock,
 				new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
 				new->br_startblock, new->br_blockcount,
 				newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock, PREV.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur,
 				PREV.br_startoff + new->br_blockcount,
 				PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock, PREV.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur,
 				PREV.br_startoff + new->br_blockcount,
 				PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
 			cur->bc_rec.b = *new;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock,
 					PREV.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
 				PREV.br_startblock,
 				PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock, PREV.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
 				PREV.br_startblock,
 				PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
 					new->br_startblock, new->br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1732,7 +1733,7 @@ xfs_bmap_add_extent_unwritten_real(
 					PREV.br_startblock, PREV.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			/* new right extent - oldext */
 			if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
 				r[1].br_startblock, r[1].br_blockcount,
@@ -1744,15 +1745,15 @@ xfs_bmap_add_extent_unwritten_real(
 			cur->bc_rec.b = PREV;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_increment(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			/* new middle extent - newext */
 			cur->bc_rec.b = *new;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		/* DELTA: One in-core extent is split in three. */
 		temp = PREV.br_startoff;
@@ -2097,13 +2098,13 @@ xfs_bmap_add_extent_hole_real(
 					right.br_startblock,
 					right.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_delete(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, left.br_startoff,
 					left.br_startblock,
 					left.br_blockcount +
@@ -2139,7 +2140,7 @@ xfs_bmap_add_extent_hole_real(
 					left.br_startblock,
 					left.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, left.br_startoff,
 					left.br_startblock,
 					left.br_blockcount +
@@ -2174,7 +2175,7 @@ xfs_bmap_add_extent_hole_real(
 					right.br_startblock,
 					right.br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
 					new->br_startblock,
 					new->br_blockcount +
@@ -2208,11 +2209,11 @@ xfs_bmap_add_extent_hole_real(
 					new->br_startblock,
 					new->br_blockcount, &i)))
 				goto done;
-			ASSERT(i == 0);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			cur->bc_rec.b.br_state = new->br_state;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		/* DELTA: A new extent was added in a hole. */
 		temp = new->br_startoff;
@@ -3131,7 +3132,7 @@ xfs_bmap_del_extent(
 					got.br_startblock, got.br_blockcount,
 					&i)))
 				goto done;
-			ASSERT(i == 1);
+			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		}
 		da_old = da_new = 0;
 	} else {
@@ -3164,7 +3165,7 @@ xfs_bmap_del_extent(
 		}
 		if ((error = xfs_bmbt_delete(cur, &i)))
 			goto done;
-		ASSERT(i == 1);
+		XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 		break;
 
 	case 2:
@@ -3268,7 +3269,7 @@ xfs_bmap_del_extent(
 							got.br_startblock,
 							temp, &i)))
 						goto done;
-					ASSERT(i == 1);
+					XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 					/*
 					 * Update the btree record back
 					 * to the original value.
@@ -3289,7 +3290,7 @@ xfs_bmap_del_extent(
 					error = XFS_ERROR(ENOSPC);
 					goto done;
 				}
-				ASSERT(i == 1);
+				XFS_WANT_CORRUPTED_GOTO(i == 1, done);
 			} else
 				flags |= XFS_ILOG_FEXT(whichfork);
 			XFS_IFORK_NEXT_SET(ip, whichfork,
-- 
cgit v1.2.3-70-g09d2


From ddea2d5246b4ffbe49bbfb700aa3dbe717eb0915 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Mon, 23 Jun 2008 13:25:53 +1000
Subject: [XFS] Always reset btree cursor after an insert

After a btree insert operation a cursor can be invalid due to block splits
and a maybe a new root block. We reset the cursor in xfs_bmbt_insert() in
the cases where we think we need to but it isn't enough as we still see
assertions. Just do what we do elsewhere and reset the cursor
unconditionally. Also remove the fix to revalidate the original cursor in
xfs_bmbt_insert().

SGI-PV: 983336

SGI-Modid: xfs-linux-melb:xfs-kern:31342a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_bmap.c       | 13 ++++++++++---
 fs/xfs/xfs_bmap_btree.c | 38 ++++----------------------------------
 2 files changed, 14 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c21e01a9b2d..cf4dee01983 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1746,11 +1746,18 @@ xfs_bmap_add_extent_unwritten_real(
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-			if ((error = xfs_bmbt_increment(cur, 0, &i)))
+			/*
+			 * Reset the cursor to the position of the new extent
+			 * we are about to insert as we can't trust it after
+			 * the previous insert.
+			 */
+			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+					new->br_startblock, new->br_blockcount,
+					&i)))
 				goto done;
-			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+			XFS_WANT_CORRUPTED_GOTO(i == 0, done);
 			/* new middle extent - newext */
-			cur->bc_rec.b = *new;
+			cur->bc_rec.b.br_state = new->br_state;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973..4aa2f11ba56 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2029,22 +2029,8 @@ xfs_bmbt_increment(
  * Insert the current record at the point referenced by cur.
  *
  * A multi-level split of the tree on insert will invalidate the original
- * cursor. It appears, however, that some callers assume that the cursor is
- * always valid. Hence if we do a multi-level split we need to revalidate the
- * cursor.
- *
- * When a split occurs, we will see a new cursor returned. Use that as a
- * trigger to determine if we need to revalidate the original cursor. If we get
- * a split, then use the original irec to lookup up the path of the record we
- * just inserted.
- *
- * Note that the fact that the btree root is in the inode means that we can
- * have the level of the tree change without a "split" occurring at the root
- * level. What happens is that the root is migrated to an allocated block and
- * the inode root is pointed to it. This means a single split can change the
- * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
- * the level change should be accounted as a split so as to correctly trigger a
- * revalidation of the old cursor.
+ * cursor.  All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
  */
 int					/* error */
 xfs_bmbt_insert(
@@ -2057,14 +2043,11 @@ xfs_bmbt_insert(
 	xfs_fsblock_t	nbno;
 	xfs_btree_cur_t	*ncur;
 	xfs_bmbt_rec_t	nrec;
-	xfs_bmbt_irec_t	oirec;		/* original irec */
 	xfs_btree_cur_t	*pcur;
-	int		splits = 0;
 
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
 	level = 0;
 	nbno = NULLFSBLOCK;
-	oirec = cur->bc_rec.b;
 	xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
 	ncur = NULL;
 	pcur = cur;
@@ -2073,13 +2056,11 @@ xfs_bmbt_insert(
 				&i))) {
 			if (pcur != cur)
 				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
-			goto error0;
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
 		}
 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 		if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
-			/* allocating a new root is effectively a split */
-			if (cur->bc_nlevels != pcur->bc_nlevels)
-				splits++;
 			cur->bc_nlevels = pcur->bc_nlevels;
 			cur->bc_private.b.allocated +=
 				pcur->bc_private.b.allocated;
@@ -2093,21 +2074,10 @@ xfs_bmbt_insert(
 			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
 		}
 		if (ncur) {
-			splits++;
 			pcur = ncur;
 			ncur = NULL;
 		}
 	} while (nbno != NULLFSBLOCK);
-
-	if (splits > 1) {
-		/* revalidate the old cursor as we had a multi-level split */
-		error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
-				oirec.br_startblock, oirec.br_blockcount, &i);
-		if (error)
-			goto error0;
-		ASSERT(i == 1);
-	}
-
 	XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 	*stat = i;
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From f9e09f095f323948b26ba09638d2eb3b0578d094 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>
Date: Mon, 23 Jun 2008 13:34:09 +1000
Subject: [XFS] Use the generic xattr methods.

Add missing file fs/xfs/linux-2.6/xfs_xattr.c

SGI-PV: 982343

SGI-Modid: xfs-linux-melb:xfs-kern:31234a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_xattr.c | 333 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 333 insertions(+)
 create mode 100644 fs/xfs/linux-2.6/xfs_xattr.c

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 00000000000..b4acb68fc9f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+/*
+ * ACL handling.  Should eventually be moved into xfs_acl.c
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+	if (strcmp(name, "posix_acl_access") == 0)
+		return _ACL_TYPE_ACCESS;
+	else if (strcmp(name, "posix_acl_default") == 0)
+		return _ACL_TYPE_DEFAULT;
+	return -EINVAL;
+}
+
+/*
+ * Get system extended attributes which at the moment only
+ * includes Posix ACLs.
+ */
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+		void *buffer, size_t size)
+{
+	int acl;
+
+	acl = xfs_decode_acl(name);
+	if (acl < 0)
+		return acl;
+
+	return xfs_acl_vget(inode, buffer, size, acl);
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	int error, acl;
+
+	acl = xfs_decode_acl(name);
+	if (acl < 0)
+		return acl;
+	if (flags & XATTR_CREATE)
+		return -EINVAL;
+
+	if (!value)
+		return xfs_acl_vremove(inode, acl);
+
+	error = xfs_acl_vset(inode, (void *)value, size, acl);
+	if (!error)
+		vn_revalidate(inode);
+	return error;
+}
+
+static struct xattr_handler xfs_xattr_system_handler = {
+	.prefix	= XATTR_SYSTEM_PREFIX,
+	.get	= xfs_xattr_system_get,
+	.set	= xfs_xattr_system_set,
+};
+
+
+/*
+ * Real xattr handling.  The only difference between the namespaces is
+ * a flag passed to the low-level attr code.
+ */
+
+static int
+__xfs_xattr_get(struct inode *inode, const char *name,
+		void *value, size_t size, int xflags)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	int error, asize = size;
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (!size) {
+		xflags |= ATTR_KERNOVAL;
+		value = NULL;
+	}
+
+	error = -xfs_attr_get(ip, name, value, &asize, xflags);
+	if (error)
+		return error;
+	return asize;
+}
+
+static int
+__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+		size_t size, int flags, int xflags)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (flags & XATTR_CREATE)
+		xflags |= ATTR_CREATE;
+	if (flags & XATTR_REPLACE)
+		xflags |= ATTR_REPLACE;
+
+	if (!value)
+		return -xfs_attr_remove(ip, name, xflags);
+	return -xfs_attr_set(ip, name, (void *)value, size, xflags);
+}
+
+static int
+xfs_xattr_user_get(struct inode *inode, const char *name,
+		void *value, size_t size)
+{
+	return __xfs_xattr_get(inode, name, value, size, 0);
+}
+
+static int
+xfs_xattr_user_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	return __xfs_xattr_set(inode, name, value, size, flags, 0);
+}
+
+static struct xattr_handler xfs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.get	= xfs_xattr_user_get,
+	.set	= xfs_xattr_user_set,
+};
+
+
+static int
+xfs_xattr_trusted_get(struct inode *inode, const char *name,
+		void *value, size_t size)
+{
+	return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
+}
+
+static int
+xfs_xattr_trusted_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
+}
+
+static struct xattr_handler xfs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.get	= xfs_xattr_trusted_get,
+	.set	= xfs_xattr_trusted_set,
+};
+
+
+static int
+xfs_xattr_secure_get(struct inode *inode, const char *name,
+		void *value, size_t size)
+{
+	return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
+}
+
+static int
+xfs_xattr_secure_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
+}
+
+static struct xattr_handler xfs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.get	= xfs_xattr_secure_get,
+	.set	= xfs_xattr_secure_set,
+};
+
+
+struct xattr_handler *xfs_xattr_handlers[] = {
+	&xfs_xattr_user_handler,
+	&xfs_xattr_trusted_handler,
+	&xfs_xattr_security_handler,
+	&xfs_xattr_system_handler,
+	NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+	if (flags & XFS_ATTR_SECURE)
+		return sizeof("security");
+	else if (flags & XFS_ATTR_ROOT)
+		return sizeof("trusted");
+	else
+		return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+	if (flags & XFS_ATTR_SECURE)
+		return xfs_xattr_security_handler.prefix;
+	else if (flags & XFS_ATTR_ROOT)
+		return xfs_xattr_trusted_handler.prefix;
+	else
+		return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
+		char *name, int namelen, int valuelen, char *value)
+{
+	unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+	char *offset;
+	int arraytop;
+
+	ASSERT(context->count >= 0);
+
+	/*
+	 * Only show root namespace entries if we are actually allowed to
+	 * see them.
+	 */
+	if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+		return 0;
+
+	arraytop = context->count + prefix_len + namelen + 1;
+	if (arraytop > context->firstu) {
+		context->count = -1;	/* insufficient space */
+		return 1;
+	}
+	offset = (char *)context->alist + context->count;
+	strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+	offset += prefix_len;
+	strncpy(offset, name, namelen);			/* real name */
+	offset += namelen;
+	*offset = '\0';
+	context->count += prefix_len + namelen + 1;
+	return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
+		char *name, int namelen, int valuelen, char *value)
+{
+	context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+	return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+		size_t size, ssize_t *result)
+{
+	char *p = data + *result;
+
+	*result += len;
+	if (!size)
+		return 0;
+	if (*result > size)
+		return -ERANGE;
+
+	strcpy(p, name);
+	return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+	struct xfs_attr_list_context context;
+	struct attrlist_cursor_kern cursor = { 0 };
+	struct inode		*inode = dentry->d_inode;
+	int			error;
+
+	/*
+	 * First read the regular on-disk attributes.
+	 */
+	memset(&context, 0, sizeof(context));
+	context.dp = XFS_I(inode);
+	context.cursor = &cursor;
+	context.resynch = 1;
+	context.alist = data;
+	context.bufsize = size;
+	context.firstu = context.bufsize;
+
+	if (size)
+		context.put_listent = xfs_xattr_put_listent;
+	else
+		context.put_listent = xfs_xattr_put_listent_sizes;
+
+	xfs_attr_list_int(&context);
+	if (context.count < 0)
+		return -ERANGE;
+
+	/*
+	 * Then add the two synthetic ACL attributes.
+	 */
+	if (xfs_acl_vhasacl_access(inode)) {
+		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+				data, size, &context.count);
+		if (error)
+			return error;
+	}
+
+	if (xfs_acl_vhasacl_default(inode)) {
+		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+				data, size, &context.count);
+		if (error)
+			return error;
+	}
+
+	return context.count;
+}
-- 
cgit v1.2.3-70-g09d2


From 07fe4dd48d046feeff8705a2a224a8fba050b1c6 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Fri, 27 Jun 2008 13:32:11 +1000
Subject: [XFS] Fix CI lookup in leaf-form directories

Instead of comparing buffer pointers, compare buffer block numbers and
don't keep buff

SGI-PV: 983564

SGI-Modid: xfs-linux-melb:xfs-kern:31346a

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_dir2_leaf.c | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index f110242d6df..93535992cb6 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1321,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
 	int			*indexp,	/* out: index in leaf block */
 	xfs_dabuf_t		**dbpp)		/* out: data buffer */
 {
-	xfs_dir2_db_t		curdb;		/* current data block number */
-	xfs_dabuf_t		*dbp;		/* data buffer */
+	xfs_dir2_db_t		curdb = -1;	/* current data block number */
+	xfs_dabuf_t		*dbp = NULL;	/* data buffer */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_inode_t		*dp;		/* incore directory inode */
 	int			error;		/* error return code */
@@ -1333,7 +1333,7 @@ xfs_dir2_leaf_lookup_int(
 	xfs_mount_t		*mp;		/* filesystem mount point */
 	xfs_dir2_db_t		newdb;		/* new data block number */
 	xfs_trans_t		*tp;		/* transaction pointer */
-	xfs_dabuf_t		*cbp;		/* case match data buffer */
+	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
 	enum xfs_dacmp		cmp;		/* name compare result */
 
 	dp = args->dp;
@@ -1342,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
 	/*
 	 * Read the leaf block into the buffer.
 	 */
-	if ((error =
-	    xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
-		    XFS_DATA_FORK))) {
+	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+							XFS_DATA_FORK);
+	if (error)
 		return error;
-	}
 	*lbpp = lbp;
 	leaf = lbp->data;
 	xfs_dir2_leaf_check(dp, lbp);
@@ -1358,9 +1357,7 @@ xfs_dir2_leaf_lookup_int(
 	 * Loop over all the entries with the right hash value
 	 * looking to match the name.
 	 */
-	cbp = NULL;
-	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
-				index < be16_to_cpu(leaf->hdr.count) &&
+	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
 				be32_to_cpu(lep->hashval) == args->hashval;
 				lep++, index++) {
 		/*
@@ -1377,7 +1374,7 @@ xfs_dir2_leaf_lookup_int(
 		 * need to pitch the old one and read the new one.
 		 */
 		if (newdb != curdb) {
-			if (dbp != cbp)
+			if (dbp)
 				xfs_da_brelse(tp, dbp);
 			error = xfs_da_read_buf(tp, dp,
 						xfs_dir2_db_to_da(mp, newdb),
@@ -1403,35 +1400,39 @@ xfs_dir2_leaf_lookup_int(
 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
 			args->cmpresult = cmp;
 			*indexp = index;
-			/*
-			 * case exact match: release the stored CI buffer if it
-			 * exists and return the current buffer.
-			 */
+			/* case exact match: return the current buffer. */
 			if (cmp == XFS_CMP_EXACT) {
-				if (cbp && cbp != dbp)
-					xfs_da_brelse(tp, cbp);
 				*dbpp = dbp;
 				return 0;
 			}
-			cbp = dbp;
+			cidb = curdb;
 		}
 	}
 	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 	/*
-	 * Here, we can only be doing a lookup (not a rename or replace).
-	 * If a case-insensitive match was found earlier, release the current
-	 * buffer and return the stored CI matching buffer.
+	 * Here, we can only be doing a lookup (not a rename or remove).
+	 * If a case-insensitive match was found earlier, re-read the
+	 * appropriate data block if required and return it.
 	 */
 	if (args->cmpresult == XFS_CMP_CASE) {
-		if (cbp != dbp)
+		ASSERT(cidb != -1);
+		if (cidb != curdb) {
 			xfs_da_brelse(tp, dbp);
-		*dbpp = cbp;
+			error = xfs_da_read_buf(tp, dp,
+						xfs_dir2_db_to_da(mp, cidb),
+						-1, &dbp, XFS_DATA_FORK);
+			if (error) {
+				xfs_da_brelse(tp, lbp);
+				return error;
+			}
+		}
+		*dbpp = dbp;
 		return 0;
 	}
 	/*
 	 * No match found, return ENOENT.
 	 */
-	ASSERT(cbp == NULL);
+	ASSERT(cidb == -1);
 	if (dbp)
 		xfs_da_brelse(tp, dbp);
 	xfs_da_brelse(tp, lbp);
-- 
cgit v1.2.3-70-g09d2


From 90ad58a83accbeb8de09de4a55d3e6b429767eae Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 27 Jun 2008 13:32:19 +1000
Subject: [XFS] Check for invalid flags in xfs_attrlist_by_handle.

xfs_attrlist_by_handle should only take the ATTR_ flags for the root
namespaces. The ATTR_KERN* flags may change at anytime and expect special
preconditions that can't be guaranteed for userspace-originating requests.
For example passing down ATTR_KERNNOVAL through xfs_attrlist_by_handle
will hit an assert in debug builds currently.

SGI-PV: 983677

SGI-Modid: xfs-linux-melb:xfs-kern:31351a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 01939ba2d8d..993f5720200 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -468,6 +468,12 @@ xfs_attrlist_by_handle(
 	if (al_hreq.buflen > XATTR_LIST_MAX)
 		return -XFS_ERROR(EINVAL);
 
+	/*
+	 * Reject flags, only allow namespaces.
+	 */
+	if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+		return -XFS_ERROR(EINVAL);
+
 	error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
 	if (error)
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From e182f57ac019b034b40d16f3c6d8e86826aecd56 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 27 Jun 2008 13:32:31 +1000
Subject: [XFS] attrmulti cleanup

xfs_attrmulti_by_handle currently request the size based on
sizeof(attr_multiop_t) but should be using sizeof(xfs_attr_multiop_t)
because that is what it is dealing with. Despite beeing wrong this
actually harmless in practice because both structures are the same size on
all platforms.

But this sizeof was the only user of struct attr_multiop so we can just
kill it. Also move the ATTR_OP_* defines xfs_attr.h into the struct
xfs_attr_multiop defintion in xfs_fs.h because they are only used with
that structure, and are part of the user ABI for the
XFS_IOC_ATTRMULTI_BY_HANDLE ioctl.

SGI-PV: 983508

SGI-Modid: xfs-linux-melb:xfs-kern:31352a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c |  2 +-
 fs/xfs/xfs_attr.h            | 16 ----------------
 fs/xfs/xfs_fs.h              |  3 +++
 3 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 993f5720200..8eddaff3684 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -593,7 +593,7 @@ xfs_attrmulti_by_handle(
 		goto out;
 
 	error = E2BIG;
-	size = am_hreq.opcount * sizeof(attr_multiop_t);
+	size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
 	if (!size || size > 16 * PAGE_SIZE)
 		goto out_vn_rele;
 
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 3115dcc6723..8b2d31c19e4 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -84,22 +84,6 @@ typedef struct attrlist_ent {	/* data from attr_list() */
 	((attrlist_ent_t *)			\
 	 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
 
-/*
- * Multi-attribute operation vector.
- */
-typedef struct attr_multiop {
-	int	am_opcode;	/* operation to perform (ATTR_OP_GET, etc.) */
-	int	am_error;	/* [out arg] result of this sub-op (an errno) */
-	char	*am_attrname;	/* attribute name to work with */
-	char	*am_attrvalue;	/* [in/out arg] attribute value (raw bytes) */
-	int	am_length;	/* [in/out arg] length of value */
-	int	am_flags;	/* bitwise OR of attr API flags defined above */
-} attr_multiop_t;
-
-#define ATTR_OP_GET	1	/* return the indicated attr's value */
-#define ATTR_OP_SET	2	/* set/create the indicated attr/value pair */
-#define ATTR_OP_REMOVE	3	/* remove the indicated attr */
-
 /*
  * Kernel-internal version of the attrlist cursor.
  */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6ca749897c5..01c0cc88d3f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -372,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
 
 typedef struct xfs_attr_multiop {
 	__u32		am_opcode;
+#define ATTR_OP_GET	1	/* return the indicated attr's value */
+#define ATTR_OP_SET	2	/* set/create the indicated attr/value pair */
+#define ATTR_OP_REMOVE	3	/* remove the indicated attr */
 	__s32		am_error;
 	void		__user *am_attrname;
 	void		__user *am_attrvalue;
-- 
cgit v1.2.3-70-g09d2


From 4ddd8bb1d25f9cbb345e1f64a56c0f641a787ede Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Fri, 27 Jun 2008 13:32:53 +1000
Subject: [XFS] use minleft when allocating in xfs_bmbt_split()

The bmap btree split code relies on a previous data extent allocation
(from xfs_bmap_btalloc()) to find an AG that has sufficient space to
perform a full btree split, when inserting the extent. When converting
unwritten extents we don't allocate a data extent so a btree split will be
the first allocation. In this case we need to set minleft so the allocator
will pick an AG that has space to complete the split(s).

SGI-PV: 983338

SGI-Modid: xfs-linux-melb:xfs-kern:31357a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_bmap_btree.c | 15 ++++++++++++++-
 fs/xfs/xfs_iomap.c      | 10 ++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4aa2f11ba56..3fc09cd8d51 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,25 @@ xfs_bmbt_split(
 	left = XFS_BUF_TO_BMBT_BLOCK(lbp);
 	args.fsbno = cur->bc_private.b.firstblock;
 	args.firstblock = args.fsbno;
+	args.minleft = 0;
 	if (args.fsbno == NULLFSBLOCK) {
 		args.fsbno = lbno;
 		args.type = XFS_ALLOCTYPE_START_BNO;
+		/*
+		 * Make sure there is sufficient room left in the AG to
+		 * complete a full tree split for an extent insert.  If
+		 * we are converting the middle part of an extent then
+		 * we may need space for two tree splits.
+		 *
+		 * We are relying on the caller to make the correct block
+		 * reservation for this operation to succeed.  If the
+		 * reservation amount is insufficient then we may fail a
+		 * block allocation here and corrupt the filesystem.
+		 */
+		args.minleft = xfs_trans_get_block_res(args.tp);
 	} else
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
-	args.mod = args.minleft = args.alignment = args.total = args.isfl =
+	args.mod = args.alignment = args.total = args.isfl =
 		args.userdata = args.minalignslop = 0;
 	args.minlen = args.maxlen = args.prod = 1;
 	args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1..67f22b2b44b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
 	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
 
+	/*
+	 * Reserve enough blocks in this transaction for two complete extent
+	 * btree splits.  We may be converting the middle part of an unwritten
+	 * extent and in this case we will insert two new extents in the btree
+	 * each of which could cause a full split.
+	 *
+	 * This reservation amount will be used in the first call to
+	 * xfs_bmbt_split() to select an AG with enough space to satisfy the
+	 * rest of the operation.
+	 */
 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
 
 	do {
-- 
cgit v1.2.3-70-g09d2


From b877e3d37dda0154868a3c78f02f38a1ec14ce79 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Fri, 27 Jun 2008 13:33:03 +1000
Subject: [XFS] Restore the lowspace extent allocator algorithm

When free space is running low the extent allocator may choose to allocate
an extent from an AG without leaving sufficient space for a btree split
when inserting the new extent (see where xfs_bmap_btalloc() sets minleft
to 0). In this case the allocator will enable the lowspace algorithm which
is supposed to allow further allocations (such as btree splits and
newroots) to allocate from sequential AGs. This algorithm has been broken
for a long time and this patch restores its behaviour.

SGI-PV: 983338

SGI-Modid: xfs-linux-melb:xfs-kern:31358a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_bmap.h       | 13 ++++++++++++-
 fs/xfs/xfs_bmap_btree.c |  8 ++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451..9f3e3a836d1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
 
 /*
  * Header for free extent list.
+ *
+ * xbf_low is used by the allocator to activate the lowspace algorithm -
+ * when free space is running low the extent allocator may choose to
+ * allocate an extent from an AG without leaving sufficient space for
+ * a btree split when inserting the new extent.  In this case the allocator
+ * will enable the lowspace algorithm which is supposed to allow further
+ * allocations (such as btree splits and newroots) to allocate from
+ * sequential AGs.  In order to avoid locking AGs out of order the lowspace
+ * algorithm will start searching for free space from AG 0.  If the correct
+ * transaction reservations have been made then this algorithm will eventually
+ * find all the space it needs.
  */
 typedef	struct xfs_bmap_free
 {
 	xfs_bmap_free_item_t	*xbf_first;	/* list of to-be-free extents */
 	int			xbf_count;	/* count of items on list */
-	int			xbf_low;	/* kludge: alloc in low mode */
+	int			xbf_low;	/* alloc in low mode */
 } xfs_bmap_free_t;
 
 #define	XFS_BMAP_MAX_NMAP	4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 3fc09cd8d51..1140cef4ba9 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1509,7 +1509,9 @@ xfs_bmbt_split(
 		 * block allocation here and corrupt the filesystem.
 		 */
 		args.minleft = xfs_trans_get_block_res(args.tp);
-	} else
+	} else if (cur->bc_private.b.flist->xbf_low)
+		args.type = XFS_ALLOCTYPE_START_BNO;
+	else
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	args.mod = args.alignment = args.total = args.isfl =
 		args.userdata = args.minalignslop = 0;
@@ -2237,7 +2239,9 @@ xfs_bmbt_newroot(
 #endif
 		args.fsbno = be64_to_cpu(*pp);
 		args.type = XFS_ALLOCTYPE_START_BNO;
-	} else
+	} else if (cur->bc_private.b.flist->xbf_low)
+		args.type = XFS_ALLOCTYPE_START_BNO;
+	else
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	if ((error = xfs_alloc_vextent(&args))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
-- 
cgit v1.2.3-70-g09d2


From 313b5c767a044c7a0db5e773cb7aea70383b2627 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Fri, 27 Jun 2008 13:33:11 +1000
Subject: [XFS] Allow xfs_bmbt_split() to fallback to the lowspace allocator
 algorithm

If xfs_bmbt_split() cannot find an AG with sufficient free space to
satisfy a full extent btree split then fall back to the lowspace allocator
algorithm.

SGI-PV: 983338

SGI-Modid: xfs-linux-melb:xfs-kern:31359a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_bmap_btree.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 1140cef4ba9..23efad29a5c 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1525,6 +1525,21 @@ xfs_bmbt_split(
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
 		return error;
 	}
+	if (args.fsbno == NULLFSBLOCK && args.minleft) {
+		/*
+		 * Could not find an AG with enough free space to satisfy
+		 * a full btree split.  Try again without minleft and if
+		 * successful activate the lowspace algorithm.
+		 */
+		args.fsbno = 0;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.minleft = 0;
+		if ((error = xfs_alloc_vextent(&args))) {
+			XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+			return error;
+		}
+		cur->bc_private.b.flist->xbf_low = 1;
+	}
 	if (args.fsbno == NULLFSBLOCK) {
 		XFS_BMBT_TRACE_CURSOR(cur, EXIT);
 		*stat = 0;
-- 
cgit v1.2.3-70-g09d2


From 8f8670bb1cfa177d35c54e4cc96152dc425a7ab3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 27 Jun 2008 13:34:26 +1000
Subject: [XFS] Don't update mtime on rename source

As reported by Michael-John Turner XFS updates the mtime on the source
inode of a rename call in case it's a directory and changes the parent.

This doesn't make any sense, is not mentioned in the standards and not
performed by any other Linux filesystems so remove it.

SGI-PV: 983684

SGI-Modid: xfs-linux-melb:xfs-kern:31364a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_rename.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad29..d700dacdb10 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,21 +336,17 @@ xfs_rename(
 		ASSERT(error != EEXIST);
 		if (error)
 			goto abort_return;
-		xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-	} else {
-		/*
-		 * We always want to hit the ctime on the source inode.
-		 * We do it in the if clause above for the 'new_parent &&
-		 * src_is_directory' case, and here we get all the other
-		 * cases.  This isn't strictly required by the standards
-		 * since the source inode isn't really being changed,
-		 * but old unix file systems did it and some incremental
-		 * backup programs won't work without it.
-		 */
-		xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
 	}
 
+	/*
+	 * We always want to hit the ctime on the source inode.
+	 *
+	 * This isn't strictly required by the standards since the source
+	 * inode isn't really being changed, but old unix file systems did
+	 * it and some incremental backup programs won't work without it.
+	 */
+	xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+
 	/*
 	 * Adjust the link count on src_dp.  This is necessary when
 	 * renaming a directory, either within one parent when
-- 
cgit v1.2.3-70-g09d2


From 2edbddd5f46cc123b68c11179115041c54759fa2 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Fri, 27 Jun 2008 13:34:34 +1000
Subject: [XFS] Don't assert if trying to mount with blocksize > pagesize

If we don't do the blocksize/PAGESIZE check before calling
xfs_sb_validate_fsb_count() we can assert if we try to mount with a
blocksize > pagesize. The assert is valid so leave it and just move the
blocksize/pagesize check earlier.

SGI-PV: 983734

SGI-Modid: xfs-linux-melb:xfs-kern:31365a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_mount.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1bfaa204f68..6c5d1325e7f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -258,6 +258,19 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
+	/*
+	 * Until this is fixed only page-sized or smaller data blocks work.
+	 */
+	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
+		xfs_fs_mount_cmn_err(flags,
+			"file system with blocksize %d bytes",
+			sbp->sb_blocksize);
+		xfs_fs_mount_cmn_err(flags,
+			"only pagesize (%ld) or less will currently work.",
+			PAGE_SIZE);
+		return XFS_ERROR(ENOSYS);
+	}
+
 	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
 	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
 		xfs_fs_mount_cmn_err(flags,
@@ -279,19 +292,6 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(ENOSYS);
 	}
 
-	/*
-	 * Until this is fixed only page-sized or smaller data blocks work.
-	 */
-	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
-		xfs_fs_mount_cmn_err(flags,
-			"file system with blocksize %d bytes",
-			sbp->sb_blocksize);
-		xfs_fs_mount_cmn_err(flags,
-			"only pagesize (%ld) or less will currently work.",
-			PAGE_SIZE);
-		return XFS_ERROR(ENOSYS);
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 136f8f21b6d564f553abe6130127d16fb50432d3 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Fri, 27 Jun 2008 13:34:42 +1000
Subject: [XFS] Fix up problem when CONFIG_XFS_POSIX_ACL is not set and yet we
 still can use the _ACL_TYPE_* definitions in linux-2.6/xfs_xattr.c. The
 forthcoming generic acl code will also fix this problem.

SGI-PV: 982343

SGI-Modid: xfs-linux-melb:xfs-kern:31369a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/xfs_acl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c..323ee94cf83 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
 #define SGI_ACL_FILE_SIZE	(sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE	(sizeof(SGI_ACL_DEFAULT)-1)
 
+#define _ACL_TYPE_ACCESS	1
+#define _ACL_TYPE_DEFAULT	2
 
 #ifdef CONFIG_XFS_POSIX_ACL
 
@@ -66,8 +68,6 @@ extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
 extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
 extern int xfs_acl_vremove(bhv_vnode_t *, int);
 
-#define _ACL_TYPE_ACCESS	1
-#define _ACL_TYPE_DEFAULT	2
 #define _ACL_PERM_INVALID(perm)	((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
 
 #define _ACL_INHERIT(c,m,d)	(xfs_acl_inherit(c,m,d))
-- 
cgit v1.2.3-70-g09d2


From 9f8868ffb39c2f80ba69df4552cb530b6634f646 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:11:46 +1000
Subject: [XFS] streamline init/exit path

Currently the xfs module init/exit code is a mess. It's farmed out over a
lot of function with very little error checking. This patch makes sure we
propagate all initialization failures properly and clean up after them.
Various runtime initializations are replaced with compile-time
initializations where possible to make this easier. The exit path is
similarly consolidated.

There's now split out function to create/destroy the kmem zones and
alloc/free the trace buffers. I've also changed the ktrace allocations to
KM_MAYFAIL and handled errors resulting from that.

And yes, we really should replace the XFS_*_TRACE ifdefs with a single
XFS_TRACE..

SGI-PV: 976035

SGI-Modid: xfs-linux-melb:xfs-kern:31354a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Niv Sardi <xaiki@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_stats.c  |  15 +-
 fs/xfs/linux-2.6/xfs_stats.h  |  11 +-
 fs/xfs/linux-2.6/xfs_super.c  | 330 +++++++++++++++++++++++++++++++++++-------
 fs/xfs/linux-2.6/xfs_sysctl.c |   8 +-
 fs/xfs/linux-2.6/xfs_sysctl.h |   4 +-
 fs/xfs/support/uuid.c         |   8 +-
 fs/xfs/support/uuid.h         |   1 -
 fs/xfs/xfs_da_btree.c         |   2 +-
 fs/xfs/xfs_error.c            |   8 -
 fs/xfs/xfs_error.h            |   1 -
 fs/xfs/xfs_filestream.c       |   4 +-
 fs/xfs/xfs_mount.h            |   3 -
 fs/xfs/xfs_mru_cache.c        |  13 +-
 fs/xfs/xfs_vfsops.c           | 131 -----------------
 14 files changed, 318 insertions(+), 221 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b610205..3d5b67c075c 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
 	return len;
 }
 
-void
+int
 xfs_init_procfs(void)
 {
 	if (!proc_mkdir("fs/xfs", NULL))
-		return;
-	create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+		goto out;
+
+	if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
+			xfs_read_xfsstats, NULL))
+		goto out_remove_entry;
+	return 0;
+
+ out_remove_entry:
+	remove_proc_entry("fs/xfs", NULL);
+ out:
+	return -ENOMEM;
 }
 
 void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb..3fa753d7b70 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
 #define XFS_STATS_DEC(v)	(per_cpu(xfsstats, current_cpu()).v--)
 #define XFS_STATS_ADD(v, inc)	(per_cpu(xfsstats, current_cpu()).v += (inc))
 
-extern void xfs_init_procfs(void);
+extern int xfs_init_procfs(void);
 extern void xfs_cleanup_procfs(void);
 
 
@@ -144,8 +144,13 @@ extern void xfs_cleanup_procfs(void);
 # define XFS_STATS_DEC(count)
 # define XFS_STATS_ADD(count, inc)
 
-static inline void xfs_init_procfs(void) { };
-static inline void xfs_cleanup_procfs(void) { };
+static inline int xfs_init_procfs(void)
+{
+	return 0
+};
+static inline void xfs_cleanup_procfs(void)
+{
+};
 
 #endif	/* !CONFIG_PROC_FS */
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 967603c4699..7c621dfee73 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -53,6 +53,11 @@
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_trace.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -987,42 +992,6 @@ xfs_fs_inode_init_once(
 	inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
 }
 
-STATIC int __init
-xfs_init_zones(void)
-{
-	xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
-					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-					KM_ZONE_SPREAD,
-					xfs_fs_inode_init_once);
-	if (!xfs_vnode_zone)
-		goto out;
-
-	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-	if (!xfs_ioend_zone)
-		goto out_destroy_vnode_zone;
-
-	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-						  xfs_ioend_zone);
-	if (!xfs_ioend_pool)
-		goto out_free_ioend_zone;
-	return 0;
-
- out_free_ioend_zone:
-	kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
-	kmem_zone_destroy(xfs_vnode_zone);
- out:
-	return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
-	mempool_destroy(xfs_ioend_pool);
-	kmem_zone_destroy(xfs_vnode_zone);
-	kmem_zone_destroy(xfs_ioend_zone);
-}
-
 /*
  * Attempt to flush the inode, this will actually fail
  * if the inode is pinned, but we dirty the inode again
@@ -1939,9 +1908,235 @@ static struct file_system_type xfs_fs_type = {
 	.fs_flags		= FS_REQUIRES_DEV,
 };
 
+STATIC int __init
+xfs_alloc_trace_bufs(void)
+{
+#ifdef XFS_ALLOC_TRACE
+	xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
+	if (!xfs_alloc_trace_buf)
+		goto out;
+#endif
+#ifdef XFS_BMAP_TRACE
+	xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
+	if (!xfs_bmap_trace_buf)
+		goto out_free_alloc_trace;
+#endif
+#ifdef XFS_BMBT_TRACE
+	xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
+	if (!xfs_bmbt_trace_buf)
+		goto out_free_bmap_trace;
+#endif
+#ifdef XFS_ATTR_TRACE
+	xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
+	if (!xfs_attr_trace_buf)
+		goto out_free_bmbt_trace;
+#endif
+#ifdef XFS_DIR2_TRACE
+	xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
+	if (!xfs_dir2_trace_buf)
+		goto out_free_attr_trace;
+#endif
+
+	return 0;
+
+#ifdef XFS_DIR2_TRACE
+ out_free_attr_trace:
+#endif
+#ifdef XFS_ATTR_TRACE
+	ktrace_free(xfs_attr_trace_buf);
+ out_free_bmbt_trace:
+#endif
+#ifdef XFS_BMBT_TRACE
+	ktrace_free(xfs_bmbt_trace_buf);
+ out_free_bmap_trace:
+#endif
+#ifdef XFS_BMAP_TRACE
+	ktrace_free(xfs_bmap_trace_buf);
+ out_free_alloc_trace:
+#endif
+#ifdef XFS_ALLOC_TRACE
+	ktrace_free(xfs_alloc_trace_buf);
+ out:
+#endif
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_free_trace_bufs(void)
+{
+#ifdef XFS_DIR2_TRACE
+	ktrace_free(xfs_dir2_trace_buf);
+#endif
+#ifdef XFS_ATTR_TRACE
+	ktrace_free(xfs_attr_trace_buf);
+#endif
+#ifdef XFS_BMBT_TRACE
+	ktrace_free(xfs_bmbt_trace_buf);
+#endif
+#ifdef XFS_BMAP_TRACE
+	ktrace_free(xfs_bmap_trace_buf);
+#endif
+#ifdef XFS_ALLOC_TRACE
+	ktrace_free(xfs_alloc_trace_buf);
+#endif
+}
+
+STATIC int __init
+xfs_init_zones(void)
+{
+	xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD,
+					xfs_fs_inode_init_once);
+	if (!xfs_vnode_zone)
+		goto out;
+
+	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+	if (!xfs_ioend_zone)
+		goto out_destroy_vnode_zone;
+
+	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+						  xfs_ioend_zone);
+	if (!xfs_ioend_pool)
+		goto out_destroy_ioend_zone;
+
+	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+						"xfs_log_ticket");
+	if (!xfs_log_ticket_zone)
+		goto out_destroy_ioend_pool;
+
+	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+						"xfs_bmap_free_item");
+	if (!xfs_bmap_free_item_zone)
+		goto out_destroy_log_ticket_zone;
+	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+						"xfs_btree_cur");
+	if (!xfs_btree_cur_zone)
+		goto out_destroy_bmap_free_item_zone;
+
+	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+						"xfs_da_state");
+	if (!xfs_da_state_zone)
+		goto out_destroy_btree_cur_zone;
+
+	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+	if (!xfs_dabuf_zone)
+		goto out_destroy_da_state_zone;
+
+	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	if (!xfs_ifork_zone)
+		goto out_destroy_dabuf_zone;
+
+	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+	if (!xfs_trans_zone)
+		goto out_destroy_ifork_zone;
+
+	/*
+	 * The size of the zone allocated buf log item is the maximum
+	 * size possible under XFS.  This wastes a little bit of memory,
+	 * but it is much faster.
+	 */
+	xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+				(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
+				  NBWORD) * sizeof(int))), "xfs_buf_item");
+	if (!xfs_buf_item_zone)
+		goto out_destroy_trans_zone;
+
+	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))), "xfs_efd_item");
+	if (!xfs_efd_zone)
+		goto out_destroy_buf_item_zone;
+
+	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+				sizeof(xfs_extent_t))), "xfs_efi_item");
+	if (!xfs_efi_zone)
+		goto out_destroy_efd_zone;
+
+	xfs_inode_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+					KM_ZONE_SPREAD, NULL);
+	if (!xfs_inode_zone)
+		goto out_destroy_efi_zone;
+
+	xfs_ili_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+					KM_ZONE_SPREAD, NULL);
+	if (!xfs_ili_zone)
+		goto out_destroy_inode_zone;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+	xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
+	if (!xfs_acl_zone)
+		goto out_destroy_ili_zone;
+#endif
+
+	return 0;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+ out_destroy_ili_zone:
+#endif
+	kmem_zone_destroy(xfs_ili_zone);
+ out_destroy_inode_zone:
+	kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+	kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+	kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+	kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_trans_zone:
+	kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+	kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+	kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+	kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+	kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+	kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+	mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+	kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+	kmem_zone_destroy(xfs_vnode_zone);
+ out:
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+#ifdef CONFIG_XFS_POSIX_ACL
+	kmem_zone_destroy(xfs_acl_zone);
+#endif
+	kmem_zone_destroy(xfs_ili_zone);
+	kmem_zone_destroy(xfs_inode_zone);
+	kmem_zone_destroy(xfs_efi_zone);
+	kmem_zone_destroy(xfs_efd_zone);
+	kmem_zone_destroy(xfs_buf_item_zone);
+	kmem_zone_destroy(xfs_trans_zone);
+	kmem_zone_destroy(xfs_ifork_zone);
+	kmem_zone_destroy(xfs_dabuf_zone);
+	kmem_zone_destroy(xfs_da_state_zone);
+	kmem_zone_destroy(xfs_btree_cur_zone);
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+	kmem_zone_destroy(xfs_log_ticket_zone);
+	mempool_destroy(xfs_ioend_pool);
+	kmem_zone_destroy(xfs_ioend_zone);
+	kmem_zone_destroy(xfs_vnode_zone);
+
+}
 
 STATIC int __init
-init_xfs_fs( void )
+init_xfs_fs(void)
 {
 	int			error;
 	static char		message[] __initdata = KERN_INFO \
@@ -1950,42 +2145,73 @@ init_xfs_fs( void )
 	printk(message);
 
 	ktrace_init(64);
+	vn_init();
+	xfs_dir_startup();
 
 	error = xfs_init_zones();
-	if (error < 0)
-		goto undo_zones;
+	if (error)
+		goto out;
+
+	error = xfs_alloc_trace_bufs();
+	if (error)
+		goto out_destroy_zones;
+
+	error = xfs_mru_cache_init();
+	if (error)
+		goto out_free_trace_buffers;
+
+	error = xfs_filestream_init();
+	if (error)
+		goto out_mru_cache_uninit;
 
 	error = xfs_buf_init();
-	if (error < 0)
-		goto undo_buffers;
+	if (error)
+		goto out_filestream_uninit;
+
+	error = xfs_init_procfs();
+	if (error)
+		goto out_buf_terminate;
+
+	error = xfs_sysctl_register();
+	if (error)
+		goto out_cleanup_procfs;
 
-	vn_init();
-	xfs_init();
-	uuid_init();
 	vfs_initquota();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
-		goto undo_register;
+		goto out_sysctl_unregister;
 	return 0;
 
-undo_register:
+ out_sysctl_unregister:
+	xfs_sysctl_unregister();
+ out_cleanup_procfs:
+	xfs_cleanup_procfs();
+ out_buf_terminate:
 	xfs_buf_terminate();
-
-undo_buffers:
+ out_filestream_uninit:
+	xfs_filestream_uninit();
+ out_mru_cache_uninit:
+	xfs_mru_cache_uninit();
+ out_free_trace_buffers:
+	xfs_free_trace_bufs();
+ out_destroy_zones:
 	xfs_destroy_zones();
-
-undo_zones:
+ out:
 	return error;
 }
 
 STATIC void __exit
-exit_xfs_fs( void )
+exit_xfs_fs(void)
 {
 	vfs_exitquota();
 	unregister_filesystem(&xfs_fs_type);
-	xfs_cleanup();
+	xfs_sysctl_unregister();
+	xfs_cleanup_procfs();
 	xfs_buf_terminate();
+	xfs_filestream_uninit();
+	xfs_mru_cache_uninit();
+	xfs_free_trace_bufs();
 	xfs_destroy_zones();
 	ktrace_uninit();
 }
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05..7dacb5bbde3 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
 	{}
 };
 
-void
+int
 xfs_sysctl_register(void)
 {
 	xfs_table_header = register_sysctl_table(xfs_root_table);
+	if (!xfs_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void
 xfs_sysctl_unregister(void)
 {
-	if (xfs_table_header)
-		unregister_sysctl_table(xfs_table_header);
+	unregister_sysctl_table(xfs_table_header);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6..4aadb8056c3 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
 extern xfs_param_t	xfs_params;
 
 #ifdef CONFIG_SYSCTL
-extern void xfs_sysctl_register(void);
+extern int xfs_sysctl_register(void);
 extern void xfs_sysctl_unregister(void);
 #else
-# define xfs_sysctl_register()		do { } while (0)
+# define xfs_sysctl_register()		(0)
 # define xfs_sysctl_unregister()	do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf859..5830c040ea7 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
  */
 #include <xfs.h>
 
-static mutex_t	uuid_monitor;
+static DEFINE_MUTEX(uuid_monitor);
 static int	uuid_table_size;
 static uuid_t	*uuid_table;
 
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
 	ASSERT(i < uuid_table_size);
 	mutex_unlock(&uuid_monitor);
 }
-
-void __init
-uuid_init(void)
-{
-	mutex_init(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199b..cff5b607d44 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
 	unsigned char	__u_bits[16];
 } uuid_t;
 
-extern void uuid_init(void);
 extern void uuid_create_nil(uuid_t *uuid);
 extern int uuid_is_nil(uuid_t *uuid);
 extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index edc0aef4e51..9e561a9cefc 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2240,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
 
 #ifdef XFS_DABUF_DEBUG
 xfs_dabuf_t	*xfs_dabuf_global_list;
-spinlock_t	xfs_dabuf_global_lock;
+static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
 #endif
 
 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 7380a00644c..f66756cfb5e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,14 +66,6 @@ int	xfs_etest[XFS_NUM_INJECT_ERROR];
 int64_t	xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
 char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
 
-void
-xfs_error_test_init(void)
-{
-	memset(xfs_etest, 0, sizeof(xfs_etest));
-	memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
-	memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
-}
-
 int
 xfs_error_test(int error_tag, int *fsidp, char *expression,
 	       int line, char *file, unsigned long randfactor)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e..d8559d132ef 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,7 +127,6 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
 
 #if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
 extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-extern void xfs_error_test_init(void);
 
 #define	XFS_NUM_INJECT_ERROR				10
 
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b1080..c38fd14fca2 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
 xfs_filestream_init(void)
 {
 	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+	if (!item_zone)
+		return -ENOMEM;
 #ifdef XFS_FILESTREAMS_TRACE
 	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
 #endif
-	return item_zone ? 0 : -ENOMEM;
+	return 0;
 }
 
 /*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2a75f1703b3..5269bd6e3df 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -546,9 +546,6 @@ extern void	xfs_qmops_put(struct xfs_mount *);
 
 extern struct xfs_dmops xfs_dmcore_xfs;
 
-extern int	xfs_init(void);
-extern void	xfs_cleanup(void);
-
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 26d14a1e0e1..afee7eb2432 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
 	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
 	                                 "xfs_mru_cache_elem");
 	if (!xfs_mru_elem_zone)
-		return ENOMEM;
+		goto out;
 
 	xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
-	if (!xfs_mru_reap_wq) {
-		kmem_zone_destroy(xfs_mru_elem_zone);
-		return ENOMEM;
-	}
+	if (!xfs_mru_reap_wq)
+		goto out_destroy_mru_elem_zone;
 
 	return 0;
+
+ out_destroy_mru_elem_zone:
+	kmem_zone_destroy(xfs_mru_elem_zone);
+ out:
+	return -ENOMEM;
 }
 
 void
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 8b5a3376c2f..4a9a43315a8 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,137 +58,6 @@
 #include "xfs_utils.h"
 
 
-int __init
-xfs_init(void)
-{
-#ifdef XFS_DABUF_DEBUG
-	extern spinlock_t        xfs_dabuf_global_lock;
-	spin_lock_init(&xfs_dabuf_global_lock);
-#endif
-
-	/*
-	 * Initialize all of the zone allocators we use.
-	 */
-	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-						"xfs_log_ticket");
-	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-						"xfs_bmap_free_item");
-	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-						"xfs_btree_cur");
-	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-						"xfs_da_state");
-	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
-	xfs_mru_cache_init();
-	xfs_filestream_init();
-
-	/*
-	 * The size of the zone allocated buf log item is the maximum
-	 * size possible under XFS.  This wastes a little bit of memory,
-	 * but it is much faster.
-	 */
-	xfs_buf_item_zone =
-		kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-				(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
-				  NBWORD) * sizeof(int))),
-			       "xfs_buf_item");
-	xfs_efd_zone =
-		kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-			       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-				 sizeof(xfs_extent_t))),
-				      "xfs_efd_item");
-	xfs_efi_zone =
-		kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-			       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-				 sizeof(xfs_extent_t))),
-				      "xfs_efi_item");
-
-	/*
-	 * These zones warrant special memory allocator hints
-	 */
-	xfs_inode_zone =
-		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-					KM_ZONE_SPREAD, NULL);
-	xfs_ili_zone =
-		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-					KM_ZONE_SPREAD, NULL);
-
-	/*
-	 * Allocate global trace buffers.
-	 */
-#ifdef XFS_ALLOC_TRACE
-	xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
-	xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
-	xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ATTR_TRACE
-	xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
-	xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
-#endif
-
-	xfs_dir_startup();
-
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
-	xfs_error_test_init();
-#endif /* DEBUG || INDUCE_IO_ERROR */
-
-	xfs_init_procfs();
-	xfs_sysctl_register();
-	return 0;
-}
-
-void __exit
-xfs_cleanup(void)
-{
-	extern kmem_zone_t	*xfs_inode_zone;
-	extern kmem_zone_t	*xfs_efd_zone;
-	extern kmem_zone_t	*xfs_efi_zone;
-
-	xfs_cleanup_procfs();
-	xfs_sysctl_unregister();
-	xfs_filestream_uninit();
-	xfs_mru_cache_uninit();
-	xfs_acl_zone_destroy(xfs_acl_zone);
-
-#ifdef XFS_DIR2_TRACE
-	ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
-	ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BMBT_TRACE
-	ktrace_free(xfs_bmbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
-	ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
-	ktrace_free(xfs_alloc_trace_buf);
-#endif
-
-	kmem_zone_destroy(xfs_bmap_free_item_zone);
-	kmem_zone_destroy(xfs_btree_cur_zone);
-	kmem_zone_destroy(xfs_inode_zone);
-	kmem_zone_destroy(xfs_trans_zone);
-	kmem_zone_destroy(xfs_da_state_zone);
-	kmem_zone_destroy(xfs_dabuf_zone);
-	kmem_zone_destroy(xfs_buf_item_zone);
-	kmem_zone_destroy(xfs_efd_zone);
-	kmem_zone_destroy(xfs_efi_zone);
-	kmem_zone_destroy(xfs_ifork_zone);
-	kmem_zone_destroy(xfs_ili_zone);
-	kmem_zone_destroy(xfs_log_ticket_zone);
-}
-
 STATIC void
 xfs_quiesce_fs(
 	xfs_mount_t		*mp)
-- 
cgit v1.2.3-70-g09d2


From deeb5912db12e8b7ccf3f4b1afaad60bc29abed9 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Fri, 18 Jul 2008 17:12:18 +1000
Subject: [XFS] Disable queue flag test in barrier check.

md raid1 can pass down barriers, but does not set an ordered flag on the
queue, so xfs does not even attempt a barrier write, and will never use
barriers on these block devices.

Remove the flag check and just let the barrier write test determine
barrier support.

A possible risk here is that if something does not set an ordered flag and
also does not properly return an error on a barrier write... but if it's
any consolation jbd/ext3/reiserfs never test the flag, and don't even do a
test write, they just disable barriers the first time an actual journal
barrier write fails.

SGI-PV: 983924

SGI-Modid: xfs-linux-melb:xfs-kern:31377a

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 7c621dfee73..fcb4931902a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -746,14 +746,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
 		return;
 	}
 
-	if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
-					QUEUE_ORDERED_NONE) {
-		xfs_fs_cmn_err(CE_NOTE, mp,
-		  "Disabling barriers, not supported by the underlying device");
-		mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		return;
-	}
-
 	if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
 		xfs_fs_cmn_err(CE_NOTE, mp,
 		  "Disabling barriers, underlying device is readonly");
-- 
cgit v1.2.3-70-g09d2


From 62a877e35d5085c65936ed3194d1bbaf84f419e1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:12:36 +1000
Subject: [XFS] fix mount option parsing in remount

Remount currently happily accept any option thrown at it, although the
only filesystem specific option it actually handles is barrier/nobarrier.
And it actually doesn't handle these correctly either because it only uses
the value it parsed when we're doing a ro->rw transition. In addition to
that there's also a bad bug in xfs_parseargs which doesn't touch the
actual option in the mount point except for a single one,
XFS_MOUNT_SMALL_INUMS and thus forced any filesystem that's every
remounted in some way to not support 64bit inodes with no way to recover
unless unmounted.

This patch changes xfs_fs_remount to use it's own linux/parser.h based
options parse instead of xfs_parseargs and reject all options except for
barrier/nobarrier and to the right thing in general. Eventually I'd like
to have a single big option table used for mount aswell but that can wait
for a while.

SGI-PV: 983964

SGI-Modid: xfs-linux-melb:xfs-kern:31382a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 72 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fcb4931902a..b4008668004 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -66,6 +66,7 @@
 #include <linux/writeback.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/parser.h>
 
 static struct quotactl_ops xfs_quotactl_operations;
 static struct super_operations xfs_super_operations;
@@ -147,6 +148,23 @@ xfs_args_allocate(
 #define MNTOPT_XDSM	"xdsm"		/* DMI enabled (DMAPI / XDSM) */
 #define MNTOPT_DMI	"dmi"		/* DMI enabled (DMAPI / XDSM) */
 
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+	Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static match_table_t tokens = {
+	{Opt_barrier, "barrier"},
+	{Opt_nobarrier, "nobarrier"},
+	{Opt_err, NULL}
+};
+
+
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
 {
@@ -1364,36 +1382,54 @@ xfs_fs_remount(
 	char			*options)
 {
 	struct xfs_mount	*mp = XFS_M(sb);
-	struct xfs_mount_args	*args;
-	int			error;
+	substring_t		args[MAX_OPT_ARGS];
+	char			*p;
 
-	args = xfs_args_allocate(sb, 0);
-	if (!args)
-		return -ENOMEM;
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
 
-	error = xfs_parseargs(mp, options, args, 1);
-	if (error)
-		goto out_free_args;
+		if (!*p)
+			continue;
 
-	if (!(*flags & MS_RDONLY)) {			/* rw/ro -> rw */
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-		mp->m_flags &= ~XFS_MOUNT_RDONLY;
-		if (args->flags & XFSMNT_BARRIER) {
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_barrier:
 			mp->m_flags |= XFS_MOUNT_BARRIER;
-			xfs_mountfs_check_barriers(mp);
-		} else {
+
+			/*
+			 * Test if barriers are actually working if we can,
+			 * else delay this check until the filesystem is
+			 * marked writeable.
+			 */
+			if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+				xfs_mountfs_check_barriers(mp);
+			break;
+		case Opt_nobarrier:
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+			break;
+		default:
+			printk(KERN_INFO
+	"XFS: mount option \"%s\" not supported for remount\n", p);
+			return -EINVAL;
 		}
-	} else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {	/* rw -> ro */
+	}
+
+	/* rw/ro -> rw */
+	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+		mp->m_flags &= ~XFS_MOUNT_RDONLY;
+		if (mp->m_flags & XFS_MOUNT_BARRIER)
+			xfs_mountfs_check_barriers(mp);
+	}
+
+	/* rw -> ro */
+	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
 		xfs_filestream_flush(mp);
 		xfs_sync(mp, SYNC_DATA_QUIESCE);
 		xfs_attr_quiesce(mp);
 		mp->m_flags |= XFS_MOUNT_RDONLY;
 	}
 
- out_free_args:
-	kfree(args);
-	return -error;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 26cc0021805e66daa6342174fb5a8c1c862f7c8e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:12:43 +1000
Subject: [XFS] s/XFS_PURGE_INODE/IRELE/g s/VN_HOLD(XFS_ITOV())/IHOLD()/

SGI-PV: 981498

SGI-Modid: xfs-linux-melb:xfs-kern:31405a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/quota/xfs_qm.c          | 10 +++++-----
 fs/xfs/quota/xfs_qm_syscalls.c |  4 ++--
 fs/xfs/quota/xfs_quota_priv.h  |  3 ---
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 26370a3128f..021934a3d45 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -445,11 +445,11 @@ xfs_qm_unmount_quotas(
 		}
 	}
 	if (uqp) {
-		 XFS_PURGE_INODE(uqp);
+		 IRELE(uqp);
 		 mp->m_quotainfo->qi_uquotaip = NULL;
 	}
 	if (gqp) {
-		XFS_PURGE_INODE(gqp);
+		IRELE(gqp);
 		mp->m_quotainfo->qi_gquotaip = NULL;
 	}
 out:
@@ -1240,11 +1240,11 @@ xfs_qm_destroy_quotainfo(
 	xfs_qm_list_destroy(&qi->qi_dqlist);
 
 	if (qi->qi_uquotaip) {
-		XFS_PURGE_INODE(qi->qi_uquotaip);
+		IRELE(qi->qi_uquotaip);
 		qi->qi_uquotaip = NULL; /* paranoia */
 	}
 	if (qi->qi_gquotaip) {
-		XFS_PURGE_INODE(qi->qi_gquotaip);
+		IRELE(qi->qi_gquotaip);
 		qi->qi_gquotaip = NULL;
 	}
 	mutex_destroy(&qi->qi_quotaofflock);
@@ -1394,7 +1394,7 @@ xfs_qm_qino_alloc(
 	 * locked exclusively and joined to the transaction already.
 	 */
 	ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
-	VN_HOLD(XFS_ITOV((*ip)));
+	IHOLD(*ip);
 
 	/*
 	 * Make the changes in the superblock, and log those too.
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 413671523cb..adfb8723f65 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
 	 * if we don't need them anymore.
 	 */
 	if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
-		XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+		IRELE(XFS_QI_UQIP(mp));
 		XFS_QI_UQIP(mp) = NULL;
 	}
 	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
-		XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+		IRELE(XFS_QI_GQIP(mp));
 		XFS_QI_GQIP(mp) = NULL;
 	}
 out_error:
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c56..c4fcea600bc 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 #define XFS_IS_SUSER_DQUOT(dqp)		\
 	(!((dqp)->q_core.d_id))
 
-#define XFS_PURGE_INODE(ip)		\
-	IRELE(ip);
-
 #define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
 				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
 				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-- 
cgit v1.2.3-70-g09d2


From 766b0925c07cd363c17ff54ebf59b6d34d8042d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:12:50 +1000
Subject: [XFS] fix compilation without CONFIG_PROC_FS

SGI-PV: 984019

SGI-Modid: xfs-linux-melb:xfs-kern:31408a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_stats.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 3fa753d7b70..e83820febc9 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -146,11 +146,12 @@ extern void xfs_cleanup_procfs(void);
 
 static inline int xfs_init_procfs(void)
 {
-	return 0
-};
+	return 0;
+}
+
 static inline void xfs_cleanup_procfs(void)
 {
-};
+}
 
 #endif	/* !CONFIG_PROC_FS */
 
-- 
cgit v1.2.3-70-g09d2


From 6a617dd22bdbf5a4c9828db98c1a8b076c9e95c8 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Fri, 18 Jul 2008 17:13:04 +1000
Subject: [XFS] A bug was found in xfs_bmap_add_extent_unwritten_real(). In a
 particular case, the delta param which is supposed to describe the region
 where extents have changed was not updated appropriately.

SGI-PV: 984030

SGI-Modid: xfs-linux-melb:xfs-kern:31663a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: Olaf Weber <olaf@sgi.com>
---
 fs/xfs/xfs_bmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index cf4dee01983..3c4beb3a432 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1740,9 +1740,9 @@ xfs_bmap_add_extent_unwritten_real(
 				r[1].br_state)))
 				goto done;
 			/* new left extent - oldext */
-			PREV.br_blockcount =
-				new->br_startoff - PREV.br_startoff;
 			cur->bc_rec.b = PREV;
+			cur->bc_rec.b.br_blockcount =
+				new->br_startoff - PREV.br_startoff;
 			if ((error = xfs_bmbt_insert(cur, &i)))
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-- 
cgit v1.2.3-70-g09d2


From c032bfcf468013643e05c8274824af10dd7cbb61 Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lachlan@sgi.com>
Date: Fri, 18 Jul 2008 17:13:12 +1000
Subject: [XFS] fix use after free with external logs or real-time devices

SGI-PV: 983806

SGI-Modid: xfs-linux-melb:xfs-kern:31666a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/linux-2.6/xfs_super.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b4008668004..30ae96397e3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -792,12 +792,14 @@ xfs_close_devices(
 	struct xfs_mount	*mp)
 {
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 		xfs_free_buftarg(mp->m_logdev_targp);
-		xfs_blkdev_put(mp->m_logdev_targp->bt_bdev);
+		xfs_blkdev_put(logdev);
 	}
 	if (mp->m_rtdev_targp) {
+		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 		xfs_free_buftarg(mp->m_rtdev_targp);
-		xfs_blkdev_put(mp->m_rtdev_targp->bt_bdev);
+		xfs_blkdev_put(rtdev);
 	}
 	xfs_free_buftarg(mp->m_ddev_targp);
 }
-- 
cgit v1.2.3-70-g09d2


From 25fe55e814a2964c7e16d16a5d08cae6e9313a3a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:13:20 +1000
Subject: [XFS] xfs_setattr currently doesn't just handle the attributes set
 through ->setattr but also addition XFS-specific attributes: project id,
 inode flags and extent size hint. Having these in a single function makes it
 more complicated and forces to have us a bhv_vattr intermediate structure
 eating up stackspace.

This patch adds a new xfs_ioctl_setattr helper for the XFS ioctls that set
these attributes and remove the code to set them through xfs_setattr.

SGI-PV: 984564

SGI-Modid: xfs-linux-melb:xfs-kern:31677a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 339 ++++++++++++++++++++++++++++++++++++++-----
 fs/xfs/linux-2.6/xfs_vnode.h |  15 --
 fs/xfs/xfs_vnodeops.c        | 169 +--------------------
 3 files changed, 311 insertions(+), 212 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 8eddaff3684..d2bbb0532ef 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
 #include "xfs_dfrag.h"
 #include "xfs_fsops.h"
 #include "xfs_vnodeops.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
 
 #include <linux/capability.h>
 #include <linux/dcache.h>
@@ -879,6 +881,297 @@ xfs_ioc_fsgetxattr(
 	return 0;
 }
 
+STATIC void
+xfs_set_diflags(
+	struct xfs_inode	*ip,
+	unsigned int		xflags)
+{
+	unsigned int		di_flags;
+
+	/* can't set PREALLOC this way, just preserve it */
+	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+	if (xflags & XFS_XFLAG_IMMUTABLE)
+		di_flags |= XFS_DIFLAG_IMMUTABLE;
+	if (xflags & XFS_XFLAG_APPEND)
+		di_flags |= XFS_DIFLAG_APPEND;
+	if (xflags & XFS_XFLAG_SYNC)
+		di_flags |= XFS_DIFLAG_SYNC;
+	if (xflags & XFS_XFLAG_NOATIME)
+		di_flags |= XFS_DIFLAG_NOATIME;
+	if (xflags & XFS_XFLAG_NODUMP)
+		di_flags |= XFS_DIFLAG_NODUMP;
+	if (xflags & XFS_XFLAG_PROJINHERIT)
+		di_flags |= XFS_DIFLAG_PROJINHERIT;
+	if (xflags & XFS_XFLAG_NODEFRAG)
+		di_flags |= XFS_DIFLAG_NODEFRAG;
+	if (xflags & XFS_XFLAG_FILESTREAM)
+		di_flags |= XFS_DIFLAG_FILESTREAM;
+	if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+		if (xflags & XFS_XFLAG_RTINHERIT)
+			di_flags |= XFS_DIFLAG_RTINHERIT;
+		if (xflags & XFS_XFLAG_NOSYMLINKS)
+			di_flags |= XFS_DIFLAG_NOSYMLINKS;
+		if (xflags & XFS_XFLAG_EXTSZINHERIT)
+			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+		if (xflags & XFS_XFLAG_REALTIME)
+			di_flags |= XFS_DIFLAG_REALTIME;
+		if (xflags & XFS_XFLAG_EXTSIZE)
+			di_flags |= XFS_DIFLAG_EXTSIZE;
+	}
+
+	ip->i_d.di_flags = di_flags;
+}
+
+
+#define FSX_PROJID	1
+#define FSX_EXTSIZE	2
+#define FSX_XFLAGS	4
+#define FSX_NONBLOCK	8
+
+STATIC int
+xfs_ioctl_setattr(
+	xfs_inode_t		*ip,
+	struct fsxattr		*fa,
+	int			mask)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	unsigned int		lock_flags = 0;
+	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
+	struct xfs_dquot	*olddquot = NULL;
+	int			code;
+
+	xfs_itrace_entry(ip);
+
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return XFS_ERROR(EROFS);
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	/*
+	 * If disk quotas is on, we make sure that the dquots do exist on disk,
+	 * before we start any other transactions. Trying to do this later
+	 * is messy. We don't care to take a readlock to look at the ids
+	 * in inode here, because we can't hold it across the trans_reserve.
+	 * If the IDs do change before we take the ilock, we're covered
+	 * because the i_*dquot fields will get updated anyway.
+	 */
+	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+		code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+					 ip->i_d.di_gid, fa->fsx_projid,
+					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+		if (code)
+			return code;
+	}
+
+	/*
+	 * For the other attributes, we acquire the inode lock and
+	 * first do an error checking pass.
+	 */
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	if (code)
+		goto error_return;
+
+	lock_flags = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lock_flags);
+
+	/*
+	 * CAP_FOWNER overrides the following restrictions:
+	 *
+	 * The user ID of the calling process must be equal
+	 * to the file owner ID, except in cases where the
+	 * CAP_FSETID capability is applicable.
+	 */
+	if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+		code = XFS_ERROR(EPERM);
+		goto error_return;
+	}
+
+	/*
+	 * Do a quota reservation only if projid is actually going to change.
+	 */
+	if (mask & FSX_PROJID) {
+		if (XFS_IS_PQUOTA_ON(mp) &&
+		    ip->i_d.di_projid != fa->fsx_projid) {
+			ASSERT(tp);
+			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (code)	/* out of quota */
+				goto error_return;
+		}
+	}
+
+	if (mask & FSX_EXTSIZE) {
+		/*
+		 * Can't change extent size if any extents are allocated.
+		 */
+		if (ip->i_d.di_nextents &&
+		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+		     fa->fsx_extsize)) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+
+		/*
+		 * Extent size must be a multiple of the appropriate block
+		 * size, if set at all.
+		 */
+		if (fa->fsx_extsize != 0) {
+			xfs_extlen_t	size;
+
+			if (XFS_IS_REALTIME_INODE(ip) ||
+			    ((mask & FSX_XFLAGS) &&
+			    (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+				size = mp->m_sb.sb_rextsize <<
+				       mp->m_sb.sb_blocklog;
+			} else {
+				size = mp->m_sb.sb_blocksize;
+			}
+
+			if (fa->fsx_extsize % size) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+	}
+
+
+	if (mask & FSX_XFLAGS) {
+		/*
+		 * Can't change realtime flag if any extents are allocated.
+		 */
+		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+		    (XFS_IS_REALTIME_INODE(ip)) !=
+		    (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+
+		/*
+		 * If realtime flag is set then must have realtime data.
+		 */
+		if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+			if ((mp->m_sb.sb_rblocks == 0) ||
+			    (mp->m_sb.sb_rextsize == 0) ||
+			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+
+		/*
+		 * Can't modify an immutable/append-only file unless
+		 * we have appropriate permission.
+		 */
+		if ((ip->i_d.di_flags &
+				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+		     (fa->fsx_xflags &
+				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+		    !capable(CAP_LINUX_IMMUTABLE)) {
+			code = XFS_ERROR(EPERM);
+			goto error_return;
+		}
+	}
+
+	xfs_trans_ijoin(tp, ip, lock_flags);
+	xfs_trans_ihold(tp, ip);
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 * If the system was configured with the "restricted_chown"
+	 * option, the owner is not permitted to give away the file,
+	 * and can change the group id only to a group of which he
+	 * or she is a member.
+	 */
+	if (mask & FSX_PROJID) {
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The set-user-ID and set-group-ID bits of a file will be
+		 * cleared upon successful return from chown()
+		 */
+		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+		    !capable(CAP_FSETID))
+			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+		/*
+		 * Change the ownerships and register quota modifications
+		 * in the transaction.
+		 */
+		if (ip->i_d.di_projid != fa->fsx_projid) {
+			if (XFS_IS_PQUOTA_ON(mp)) {
+				olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
+			ip->i_d.di_projid = fa->fsx_projid;
+
+			/*
+			 * We may have to rev the inode as well as
+			 * the superblock version number since projids didn't
+			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+			 */
+			if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+				xfs_bump_ino_vers2(tp, ip);
+		}
+
+	}
+
+	if (mask & FSX_EXTSIZE)
+		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+	if (mask & FSX_XFLAGS)
+		xfs_set_diflags(ip, fa->fsx_xflags);
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 * This is slightly sub-optimal in that truncates require
+	 * two sync transactions instead of one for wsync filesystems.
+	 * One for the truncate and one for the timestamps since we
+	 * don't want to change the timestamps unless we're sure the
+	 * truncate worked.  Truncates are less than 1% of the laddis
+	 * mix so this probably isn't worth the trouble to optimize.
+	 */
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+	code = xfs_trans_commit(tp, 0);
+	xfs_iunlock(ip, lock_flags);
+
+	/*
+	 * Release any dquot(s) the inode had kept before chown.
+	 */
+	XFS_QM_DQRELE(mp, olddquot);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
+
+	if (code)
+		return code;
+
+	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
+		XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
+				NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
+				(mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
+	}
+
+	vn_revalidate(XFS_ITOV(ip));	/* update flags */
+	return 0;
+
+ error_return:
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
+	xfs_trans_cancel(tp, 0);
+	if (lock_flags)
+		xfs_iunlock(ip, lock_flags);
+	return code;
+}
+
 STATIC int
 xfs_ioc_fssetxattr(
 	xfs_inode_t		*ip,
@@ -886,31 +1179,16 @@ xfs_ioc_fssetxattr(
 	void			__user *arg)
 {
 	struct fsxattr		fa;
-	struct bhv_vattr	*vattr;
-	int			error;
-	int			attr_flags;
+	unsigned int		mask;
 
 	if (copy_from_user(&fa, arg, sizeof(fa)))
 		return -EFAULT;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
-	attr_flags = 0;
+	mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
 	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		attr_flags |= ATTR_NONBLOCK;
+		mask |= FSX_NONBLOCK;
 
-	vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
-	vattr->va_xflags  = fa.fsx_xflags;
-	vattr->va_extsize = fa.fsx_extsize;
-	vattr->va_projid  = fa.fsx_projid;
-
-	error = -xfs_setattr(ip, vattr, attr_flags, NULL);
-	if (!error)
-		vn_revalidate(XFS_ITOV(ip));	/* update flags */
-	kfree(vattr);
-	return 0;
+	return -xfs_ioctl_setattr(ip, &fa, mask);
 }
 
 STATIC int
@@ -932,10 +1210,9 @@ xfs_ioc_setxflags(
 	struct file		*filp,
 	void			__user *arg)
 {
-	struct bhv_vattr	*vattr;
+	struct fsxattr		fa;
 	unsigned int		flags;
-	int			attr_flags;
-	int			error;
+	unsigned int		mask;
 
 	if (copy_from_user(&flags, arg, sizeof(flags)))
 		return -EFAULT;
@@ -945,22 +1222,12 @@ xfs_ioc_setxflags(
 		      FS_SYNC_FL))
 		return -EOPNOTSUPP;
 
-	vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
-	if (unlikely(!vattr))
-		return -ENOMEM;
-
-	attr_flags = 0;
+	mask = FSX_XFLAGS;
 	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		attr_flags |= ATTR_NONBLOCK;
+		mask |= FSX_NONBLOCK;
+	fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
 
-	vattr->va_mask = XFS_AT_XFLAGS;
-	vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
-	error = -xfs_setattr(ip, vattr, attr_flags, NULL);
-	if (likely(!error))
-		vn_revalidate(XFS_ITOV(ip));	/* update flags */
-	kfree(vattr);
-	return error;
+	return -xfs_ioctl_setattr(ip, &fa, mask);
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9..7797c9cdb59 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -117,26 +117,11 @@ typedef struct bhv_vattr {
 #define XFS_AT_ACL		0x00080000
 #define XFS_AT_CAP		0x00100000
 #define XFS_AT_INF		0x00200000
-#define XFS_AT_XFLAGS		0x00400000
-#define XFS_AT_EXTSIZE		0x00800000
 #define XFS_AT_NEXTENTS		0x01000000
 #define XFS_AT_ANEXTENTS	0x02000000
-#define XFS_AT_PROJID		0x04000000
 #define XFS_AT_SIZE_NOPERM	0x08000000
 #define XFS_AT_GENCOUNT		0x10000000
 
-#define XFS_AT_ALL	(XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
-		XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
-		XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
-		XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
-		XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
-		XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
-
-#define XFS_AT_STAT	(XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
-		XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
-		XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
-		XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
-
 #define XFS_AT_TIMES	(XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
 
 #define XFS_AT_UPDTIMES	(XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8297a8c5af9..ed399523b78 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -94,7 +94,6 @@ xfs_setattr(
 	uid_t			uid=0, iuid=0;
 	gid_t			gid=0, igid=0;
 	int			timeflags = 0;
-	xfs_prid_t		projid=0, iprojid=0;
 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
 	int			file_owner;
 	int			need_iolock = 1;
@@ -139,8 +138,7 @@ xfs_setattr(
 	 * If the IDs do change before we take the ilock, we're covered
 	 * because the i_*dquot fields will get updated anyway.
 	 */
-	if (XFS_IS_QUOTA_ON(mp) &&
-	    (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
+	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
 		uint	qflags = 0;
 
 		if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
@@ -155,12 +153,7 @@ xfs_setattr(
 		}  else {
 			gid = ip->i_d.di_gid;
 		}
-		if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
-			projid = vap->va_projid;
-			qflags |= XFS_QMOPT_PQUOTA;
-		}  else {
-			projid = ip->i_d.di_projid;
-		}
+
 		/*
 		 * We take a reference when we initialize udqp and gdqp,
 		 * so it is important that we never blindly double trip on
@@ -168,8 +161,8 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
-					 &udqp, &gdqp);
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+					 qflags, &udqp, &gdqp);
 		if (code)
 			return code;
 	}
@@ -219,9 +212,7 @@ xfs_setattr(
 	 * Only the owner or users with CAP_FOWNER
 	 * capability may do these things.
 	 */
-	if (mask &
-	    (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
-	     XFS_AT_GID|XFS_AT_PROJID)) {
+	if (mask & (XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID)) {
 		/*
 		 * CAP_FOWNER overrides the following restrictions:
 		 *
@@ -270,7 +261,7 @@ xfs_setattr(
 	 * and can change the group id only to a group of which he
 	 * or she is a member.
 	 */
-	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+	if (mask & (XFS_AT_UID|XFS_AT_GID)) {
 		/*
 		 * These IDs could have changed since we last looked at them.
 		 * But, we're assured that if the ownership did change
@@ -278,12 +269,9 @@ xfs_setattr(
 		 * would have changed also.
 		 */
 		iuid = ip->i_d.di_uid;
-		iprojid = ip->i_d.di_projid;
 		igid = ip->i_d.di_gid;
 		gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
 		uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
-		projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
-			 iprojid;
 
 		/*
 		 * CAP_CHOWN overrides the following restrictions:
@@ -303,11 +291,10 @@ xfs_setattr(
 			goto error_return;
 		}
 		/*
-		 * Do a quota reservation only if uid/projid/gid is actually
+		 * Do a quota reservation only if uid/gid is actually
 		 * going to change.
 		 */
 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		    (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
 			ASSERT(tp);
 			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -360,78 +347,6 @@ xfs_setattr(
 		}
 	}
 
-	/*
-	 * Change extent size or realtime flag.
-	 */
-	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
-		/*
-		 * Can't change extent size if any extents are allocated.
-		 */
-		if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
-		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-		     vap->va_extsize) ) {
-			code = XFS_ERROR(EINVAL);	/* EFBIG? */
-			goto error_return;
-		}
-
-		/*
-		 * Can't change realtime flag if any extents are allocated.
-		 */
-		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-		    (mask & XFS_AT_XFLAGS) &&
-		    (XFS_IS_REALTIME_INODE(ip)) !=
-		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
-			code = XFS_ERROR(EINVAL);	/* EFBIG? */
-			goto error_return;
-		}
-		/*
-		 * Extent size must be a multiple of the appropriate block
-		 * size, if set at all.
-		 */
-		if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
-			xfs_extlen_t	size;
-
-			if (XFS_IS_REALTIME_INODE(ip) ||
-			    ((mask & XFS_AT_XFLAGS) &&
-			    (vap->va_xflags & XFS_XFLAG_REALTIME))) {
-				size = mp->m_sb.sb_rextsize <<
-				       mp->m_sb.sb_blocklog;
-			} else {
-				size = mp->m_sb.sb_blocksize;
-			}
-			if (vap->va_extsize % size) {
-				code = XFS_ERROR(EINVAL);
-				goto error_return;
-			}
-		}
-		/*
-		 * If realtime flag is set then must have realtime data.
-		 */
-		if ((mask & XFS_AT_XFLAGS) &&
-		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
-			if ((mp->m_sb.sb_rblocks == 0) ||
-			    (mp->m_sb.sb_rextsize == 0) ||
-			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-				code = XFS_ERROR(EINVAL);
-				goto error_return;
-			}
-		}
-
-		/*
-		 * Can't modify an immutable/append-only file unless
-		 * we have appropriate permission.
-		 */
-		if ((mask & XFS_AT_XFLAGS) &&
-		    (ip->i_d.di_flags &
-				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-		     (vap->va_xflags &
-				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-		    !capable(CAP_LINUX_IMMUTABLE)) {
-			code = XFS_ERROR(EPERM);
-			goto error_return;
-		}
-	}
-
 	/*
 	 * Now we can make the changes.  Before we join the inode
 	 * to the transaction, if XFS_AT_SIZE is set then take care of
@@ -568,7 +483,7 @@ xfs_setattr(
 	 * and can change the group id only to a group of which he
 	 * or she is a member.
 	 */
-	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+	if (mask & (XFS_AT_UID|XFS_AT_GID)) {
 		/*
 		 * CAP_FSETID overrides the following restrictions:
 		 *
@@ -603,23 +518,6 @@ xfs_setattr(
 			}
 			ip->i_d.di_gid = gid;
 		}
-		if (iprojid != projid) {
-			if (XFS_IS_PQUOTA_ON(mp)) {
-				ASSERT(!XFS_IS_GQUOTA_ON(mp));
-				ASSERT(mask & XFS_AT_PROJID);
-				ASSERT(gdqp);
-				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
-							&ip->i_gdquot, gdqp);
-			}
-			ip->i_d.di_projid = projid;
-			/*
-			 * We may have to rev the inode as well as
-			 * the superblock version number since projids didn't
-			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-			 */
-			if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
-				xfs_bump_ino_vers2(tp, ip);
-		}
 
 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
 		timeflags |= XFS_ICHGTIME_CHG;
@@ -646,57 +544,6 @@ xfs_setattr(
 			xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
 	}
 
-	/*
-	 * Change XFS-added attributes.
-	 */
-	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
-		if (mask & XFS_AT_EXTSIZE) {
-			/*
-			 * Converting bytes to fs blocks.
-			 */
-			ip->i_d.di_extsize = vap->va_extsize >>
-				mp->m_sb.sb_blocklog;
-		}
-		if (mask & XFS_AT_XFLAGS) {
-			uint	di_flags;
-
-			/* can't set PREALLOC this way, just preserve it */
-			di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-			if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
-				di_flags |= XFS_DIFLAG_IMMUTABLE;
-			if (vap->va_xflags & XFS_XFLAG_APPEND)
-				di_flags |= XFS_DIFLAG_APPEND;
-			if (vap->va_xflags & XFS_XFLAG_SYNC)
-				di_flags |= XFS_DIFLAG_SYNC;
-			if (vap->va_xflags & XFS_XFLAG_NOATIME)
-				di_flags |= XFS_DIFLAG_NOATIME;
-			if (vap->va_xflags & XFS_XFLAG_NODUMP)
-				di_flags |= XFS_DIFLAG_NODUMP;
-			if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
-				di_flags |= XFS_DIFLAG_PROJINHERIT;
-			if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
-				di_flags |= XFS_DIFLAG_NODEFRAG;
-			if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
-				di_flags |= XFS_DIFLAG_FILESTREAM;
-			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
-				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
-					di_flags |= XFS_DIFLAG_RTINHERIT;
-				if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
-					di_flags |= XFS_DIFLAG_NOSYMLINKS;
-				if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
-					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-			} else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
-				if (vap->va_xflags & XFS_XFLAG_REALTIME)
-					di_flags |= XFS_DIFLAG_REALTIME;
-				if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
-					di_flags |= XFS_DIFLAG_EXTSIZE;
-			}
-			ip->i_d.di_flags = di_flags;
-		}
-		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-		timeflags |= XFS_ICHGTIME_CHG;
-	}
-
 	/*
 	 * Change file inode change time only if XFS_AT_CTIME set
 	 * AND we have been called by a DMI function.
-- 
cgit v1.2.3-70-g09d2


From 0f285c8a1c4cacfd9f2aec077b06e2b537ee57ab Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 18 Jul 2008 17:13:28 +1000
Subject: [XFS] Now that xfs_setattr is only used for attributes set from
 ->setattr it can be switched to take struct iattr directly and thus simplify
 the implementation greatly. Also rename the ATTR_ flags to XFS_ATTR_ to not
 conflict with the ATTR_ flags used by the VFS.

SGI-PV: 984565

SGI-Modid: xfs-linux-melb:xfs-kern:31678a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c |   4 +-
 fs/xfs/linux-2.6/xfs_iops.c  |  56 +++-----------
 fs/xfs/linux-2.6/xfs_vnode.h |  73 ------------------
 fs/xfs/xfs_acl.c             |  18 ++---
 fs/xfs/xfs_dmapi.h           |   2 +-
 fs/xfs/xfs_vnodeops.c        | 172 ++++++++++++++++++-------------------------
 fs/xfs/xfs_vnodeops.h        |   8 +-
 7 files changed, 102 insertions(+), 231 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d2bbb0532ef..d1b0da6bcdc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -688,9 +688,9 @@ xfs_ioc_space(
 		return -XFS_ERROR(EFAULT);
 
 	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		attr_flags |= ATTR_NONBLOCK;
+		attr_flags |= XFS_ATTR_NONBLOCK;
 	if (ioflags & IO_INVIS)
-		attr_flags |= ATTR_DMI;
+		attr_flags |= XFS_ATTR_DMI;
 
 	error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
 					      NULL, attr_flags);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 7b42569968f..669bbdc2085 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -648,54 +648,20 @@ xfs_vn_getattr(
 STATIC int
 xfs_vn_setattr(
 	struct dentry	*dentry,
-	struct iattr	*attr)
+	struct iattr	*iattr)
 {
 	struct inode	*inode = dentry->d_inode;
-	unsigned int	ia_valid = attr->ia_valid;
-	bhv_vattr_t	vattr = { 0 };
-	int		flags = 0;
 	int		error;
 
-	if (ia_valid & ATTR_UID) {
-		vattr.va_mask |= XFS_AT_UID;
-		vattr.va_uid = attr->ia_uid;
-	}
-	if (ia_valid & ATTR_GID) {
-		vattr.va_mask |= XFS_AT_GID;
-		vattr.va_gid = attr->ia_gid;
-	}
-	if (ia_valid & ATTR_SIZE) {
-		vattr.va_mask |= XFS_AT_SIZE;
-		vattr.va_size = attr->ia_size;
-	}
-	if (ia_valid & ATTR_ATIME) {
-		vattr.va_mask |= XFS_AT_ATIME;
-		vattr.va_atime = attr->ia_atime;
-		inode->i_atime = attr->ia_atime;
-	}
-	if (ia_valid & ATTR_MTIME) {
-		vattr.va_mask |= XFS_AT_MTIME;
-		vattr.va_mtime = attr->ia_mtime;
-	}
-	if (ia_valid & ATTR_CTIME) {
-		vattr.va_mask |= XFS_AT_CTIME;
-		vattr.va_ctime = attr->ia_ctime;
-	}
-	if (ia_valid & ATTR_MODE) {
-		vattr.va_mask |= XFS_AT_MODE;
-		vattr.va_mode = attr->ia_mode;
+	if (iattr->ia_valid & ATTR_ATIME)
+		inode->i_atime = iattr->ia_atime;
+
+	if (iattr->ia_valid & ATTR_MODE) {
 		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
 			inode->i_mode &= ~S_ISGID;
 	}
 
-	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
-		flags |= ATTR_UTIME;
-#ifdef ATTR_NO_BLOCK
-	if ((ia_valid & ATTR_NO_BLOCK))
-		flags |= ATTR_NONBLOCK;
-#endif
-
-	error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
+	error = xfs_setattr(XFS_I(inode), iattr, 0, NULL);
 	if (likely(!error))
 		vn_revalidate(vn_from_inode(inode));
 	return -error;
@@ -739,18 +705,18 @@ xfs_vn_fallocate(
 
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 	error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
-						0, NULL, ATTR_NOLOCK);
+				      0, NULL, XFS_ATTR_NOLOCK);
 	if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
 	    offset + len > i_size_read(inode))
 		new_size = offset + len;
 
 	/* Change file size if needed */
 	if (new_size) {
-		bhv_vattr_t	va;
+		struct iattr iattr;
 
-		va.va_mask = XFS_AT_SIZE;
-		va.va_size = new_size;
-		error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL);
+		iattr.ia_valid = ATTR_SIZE;
+		iattr.ia_size = new_size;
+		error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
 	}
 
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 7797c9cdb59..96e4a7b5391 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,7 +19,6 @@
 #define __XFS_VNODE_H__
 
 struct file;
-struct bhv_vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
 
@@ -66,69 +65,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
 					   Prevent VM access to the pages until
 					   the operation completes. */
 
-/*
- * Vnode attributes.  va_mask indicates those attributes the caller
- * wants to set or extract.
- */
-typedef struct bhv_vattr {
-	int		va_mask;	/* bit-mask of attributes present */
-	mode_t		va_mode;	/* file access mode and type */
-	xfs_nlink_t	va_nlink;	/* number of references to file */
-	uid_t		va_uid;		/* owner user id */
-	gid_t		va_gid;		/* owner group id */
-	xfs_ino_t	va_nodeid;	/* file id */
-	xfs_off_t	va_size;	/* file size in bytes */
-	u_long		va_blocksize;	/* blocksize preferred for i/o */
-	struct timespec	va_atime;	/* time of last access */
-	struct timespec	va_mtime;	/* time of last modification */
-	struct timespec	va_ctime;	/* time file changed */
-	u_int		va_gen;		/* generation number of file */
-	xfs_dev_t	va_rdev;	/* device the special file represents */
-	__int64_t	va_nblocks;	/* number of blocks allocated */
-	u_long		va_xflags;	/* random extended file flags */
-	u_long		va_extsize;	/* file extent size */
-	u_long		va_nextents;	/* number of extents in file */
-	u_long		va_anextents;	/* number of attr extents in file */
-	prid_t		va_projid;	/* project id */
-} bhv_vattr_t;
-
-/*
- * setattr or getattr attributes
- */
-#define XFS_AT_TYPE		0x00000001
-#define XFS_AT_MODE		0x00000002
-#define XFS_AT_UID		0x00000004
-#define XFS_AT_GID		0x00000008
-#define XFS_AT_FSID		0x00000010
-#define XFS_AT_NODEID		0x00000020
-#define XFS_AT_NLINK		0x00000040
-#define XFS_AT_SIZE		0x00000080
-#define XFS_AT_ATIME		0x00000100
-#define XFS_AT_MTIME		0x00000200
-#define XFS_AT_CTIME		0x00000400
-#define XFS_AT_RDEV		0x00000800
-#define XFS_AT_BLKSIZE		0x00001000
-#define XFS_AT_NBLOCKS		0x00002000
-#define XFS_AT_VCODE		0x00004000
-#define XFS_AT_MAC		0x00008000
-#define XFS_AT_UPDATIME		0x00010000
-#define XFS_AT_UPDMTIME		0x00020000
-#define XFS_AT_UPDCTIME		0x00040000
-#define XFS_AT_ACL		0x00080000
-#define XFS_AT_CAP		0x00100000
-#define XFS_AT_INF		0x00200000
-#define XFS_AT_NEXTENTS		0x01000000
-#define XFS_AT_ANEXTENTS	0x02000000
-#define XFS_AT_SIZE_NOPERM	0x08000000
-#define XFS_AT_GENCOUNT		0x10000000
-
-#define XFS_AT_TIMES	(XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
-
-#define XFS_AT_UPDTIMES	(XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
-
-#define XFS_AT_NOSET	(XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
-		XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
-		XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
 
 extern void	vn_init(void);
 extern int	vn_revalidate(bhv_vnode_t *);
@@ -204,15 +140,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 #define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \
 					PAGECACHE_TAG_DIRTY)
 
-/*
- * Flags to vop_setattr/getattr.
- */
-#define	ATTR_UTIME	0x01	/* non-default utime(2) request */
-#define	ATTR_DMI	0x08	/* invocation from a DMI function */
-#define	ATTR_LAZY	0x80	/* set/get attributes lazily */
-#define	ATTR_NONBLOCK	0x100	/* return EAGAIN if operation would block */
-#define ATTR_NOLOCK	0x200	/* Don't grab any conflicting locks */
-#define ATTR_NOSIZETOK	0x400	/* Don't get the SIZE token */
 
 /*
  * Tracking vnode activity.
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 93057af2fe3..3e4648ad9cf 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -719,7 +719,7 @@ xfs_acl_setmode(
 	xfs_acl_t	*acl,
 	int		*basicperms)
 {
-	bhv_vattr_t	va;
+	struct iattr	iattr;
 	xfs_acl_entry_t	*ap;
 	xfs_acl_entry_t	*gap = NULL;
 	int		i, nomask = 1;
@@ -733,25 +733,25 @@ xfs_acl_setmode(
 	 * Copy the u::, g::, o::, and m:: bits from the ACL into the
 	 * mode.  The m:: bits take precedence over the g:: bits.
 	 */
-	va.va_mask = XFS_AT_MODE;
-	va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
-	va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
+	iattr.ia_valid = ATTR_MODE;
+	iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode;
+	iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
 	ap = acl->acl_entry;
 	for (i = 0; i < acl->acl_cnt; ++i) {
 		switch (ap->ae_tag) {
 		case ACL_USER_OBJ:
-			va.va_mode |= ap->ae_perm << 6;
+			iattr.ia_mode |= ap->ae_perm << 6;
 			break;
 		case ACL_GROUP_OBJ:
 			gap = ap;
 			break;
 		case ACL_MASK:	/* more than just standard modes */
 			nomask = 0;
-			va.va_mode |= ap->ae_perm << 3;
+			iattr.ia_mode |= ap->ae_perm << 3;
 			*basicperms = 0;
 			break;
 		case ACL_OTHER:
-			va.va_mode |= ap->ae_perm;
+			iattr.ia_mode |= ap->ae_perm;
 			break;
 		default:	/* more than just standard modes */
 			*basicperms = 0;
@@ -762,9 +762,9 @@ xfs_acl_setmode(
 
 	/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
 	if (gap && nomask)
-		va.va_mode |= gap->ae_perm << 3;
+		iattr.ia_mode |= gap->ae_perm << 3;
 
-	return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred);
+	return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred);
 }
 
 /*
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a6..cdc2d3464a1 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,6 +166,6 @@ typedef enum {
 
 #define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
 			DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
+#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
 
 #endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ed399523b78..b792a121b1a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,19 +75,16 @@ xfs_open(
 	return 0;
 }
 
-/*
- * xfs_setattr
- */
 int
 xfs_setattr(
-	xfs_inode_t		*ip,
-	bhv_vattr_t		*vap,
+	struct xfs_inode	*ip,
+	struct iattr		*iattr,
 	int			flags,
 	cred_t			*credp)
 {
 	xfs_mount_t		*mp = ip->i_mount;
+	int			mask = iattr->ia_valid;
 	xfs_trans_t		*tp;
-	int			mask;
 	int			code;
 	uint			lock_flags;
 	uint			commit_flags=0;
@@ -103,30 +100,9 @@ xfs_setattr(
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return XFS_ERROR(EROFS);
 
-	/*
-	 * Cannot set certain attributes.
-	 */
-	mask = vap->va_mask;
-	if (mask & XFS_AT_NOSET) {
-		return XFS_ERROR(EINVAL);
-	}
-
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	/*
-	 * Timestamps do not need to be logged and hence do not
-	 * need to be done within a transaction.
-	 */
-	if (mask & XFS_AT_UPDTIMES) {
-		ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
-		timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
-			    ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
-			    ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
-		xfs_ichgtime(ip, timeflags);
-		return 0;
-	}
-
 	olddquot1 = olddquot2 = NULL;
 	udqp = gdqp = NULL;
 
@@ -138,17 +114,17 @@ xfs_setattr(
 	 * If the IDs do change before we take the ilock, we're covered
 	 * because the i_*dquot fields will get updated anyway.
 	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
+	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
 		uint	qflags = 0;
 
-		if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
-			uid = vap->va_uid;
+		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+			uid = iattr->ia_uid;
 			qflags |= XFS_QMOPT_UQUOTA;
 		} else {
 			uid = ip->i_d.di_uid;
 		}
-		if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
-			gid = vap->va_gid;
+		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+			gid = iattr->ia_gid;
 			qflags |= XFS_QMOPT_GQUOTA;
 		}  else {
 			gid = ip->i_d.di_gid;
@@ -173,10 +149,10 @@ xfs_setattr(
 	 */
 	tp = NULL;
 	lock_flags = XFS_ILOCK_EXCL;
-	if (flags & ATTR_NOLOCK)
+	if (flags & XFS_ATTR_NOLOCK)
 		need_iolock = 0;
-	if (!(mask & XFS_AT_SIZE)) {
-		if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+	if (!(mask & ATTR_SIZE)) {
+		if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
 		    (mp->m_flags & XFS_MOUNT_WSYNC)) {
 			tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
 			commit_flags = 0;
@@ -189,10 +165,10 @@ xfs_setattr(
 		}
 	} else {
 		if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
-		    !(flags & ATTR_DMI)) {
+		    !(flags & XFS_ATTR_DMI)) {
 			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
 			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
-				vap->va_size, 0, dmflags, NULL);
+				iattr->ia_size, 0, dmflags, NULL);
 			if (code) {
 				lock_flags = 0;
 				goto error_return;
@@ -212,7 +188,7 @@ xfs_setattr(
 	 * Only the owner or users with CAP_FOWNER
 	 * capability may do these things.
 	 */
-	if (mask & (XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID)) {
+	if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
 		/*
 		 * CAP_FOWNER overrides the following restrictions:
 		 *
@@ -236,21 +212,21 @@ xfs_setattr(
 		 * IDs of the calling process shall match the group owner of
 		 * the file when setting the set-group-ID bit on that file
 		 */
-		if (mask & XFS_AT_MODE) {
+		if (mask & ATTR_MODE) {
 			mode_t m = 0;
 
-			if ((vap->va_mode & S_ISUID) && !file_owner)
+			if ((iattr->ia_mode & S_ISUID) && !file_owner)
 				m |= S_ISUID;
-			if ((vap->va_mode & S_ISGID) &&
+			if ((iattr->ia_mode & S_ISGID) &&
 			    !in_group_p((gid_t)ip->i_d.di_gid))
 				m |= S_ISGID;
 #if 0
 			/* Linux allows this, Irix doesn't. */
-			if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
+			if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
 				m |= S_ISVTX;
 #endif
 			if (m && !capable(CAP_FSETID))
-				vap->va_mode &= ~m;
+				iattr->ia_mode &= ~m;
 		}
 	}
 
@@ -261,7 +237,7 @@ xfs_setattr(
 	 * and can change the group id only to a group of which he
 	 * or she is a member.
 	 */
-	if (mask & (XFS_AT_UID|XFS_AT_GID)) {
+	if (mask & (ATTR_UID|ATTR_GID)) {
 		/*
 		 * These IDs could have changed since we last looked at them.
 		 * But, we're assured that if the ownership did change
@@ -270,8 +246,8 @@ xfs_setattr(
 		 */
 		iuid = ip->i_d.di_uid;
 		igid = ip->i_d.di_gid;
-		gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
-		uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
+		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
 
 		/*
 		 * CAP_CHOWN overrides the following restrictions:
@@ -308,13 +284,13 @@ xfs_setattr(
 	/*
 	 * Truncate file.  Must have write permission and not be a directory.
 	 */
-	if (mask & XFS_AT_SIZE) {
+	if (mask & ATTR_SIZE) {
 		/* Short circuit the truncate case for zero length files */
-		if ((vap->va_size == 0) &&
-		   (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
+		if (iattr->ia_size == 0 &&
+		    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 			lock_flags &= ~XFS_ILOCK_EXCL;
-			if (mask & XFS_AT_CTIME)
+			if (mask & ATTR_CTIME)
 				xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 			code = 0;
 			goto error_return;
@@ -337,9 +313,9 @@ xfs_setattr(
 	/*
 	 * Change file access or modified times.
 	 */
-	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+	if (mask & (ATTR_ATIME|ATTR_MTIME)) {
 		if (!file_owner) {
-			if ((flags & ATTR_UTIME) &&
+			if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
 			    !capable(CAP_FOWNER)) {
 				code = XFS_ERROR(EPERM);
 				goto error_return;
@@ -349,23 +325,22 @@ xfs_setattr(
 
 	/*
 	 * Now we can make the changes.  Before we join the inode
-	 * to the transaction, if XFS_AT_SIZE is set then take care of
+	 * to the transaction, if ATTR_SIZE is set then take care of
 	 * the part of the truncation that must be done without the
 	 * inode lock.  This needs to be done before joining the inode
 	 * to the transaction, because the inode cannot be unlocked
 	 * once it is a part of the transaction.
 	 */
-	if (mask & XFS_AT_SIZE) {
+	if (mask & ATTR_SIZE) {
 		code = 0;
-		if ((vap->va_size > ip->i_size) &&
-		    (flags & ATTR_NOSIZETOK) == 0) {
+		if (iattr->ia_size > ip->i_size) {
 			/*
 			 * Do the first part of growing a file: zero any data
 			 * in the last block that is beyond the old EOF.  We
 			 * need to do this before the inode is joined to the
 			 * transaction to modify the i_size.
 			 */
-			code = xfs_zero_eof(ip, vap->va_size, ip->i_size);
+			code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
@@ -382,10 +357,10 @@ xfs_setattr(
 		 * not within the range we care about here.
 		 */
 		if (!code &&
-		    (ip->i_size != ip->i_d.di_size) &&
-		    (vap->va_size > ip->i_d.di_size)) {
+		    ip->i_size != ip->i_d.di_size &&
+		    iattr->ia_size > ip->i_d.di_size) {
 			code = xfs_flush_pages(ip,
-					ip->i_d.di_size, vap->va_size,
+					ip->i_d.di_size, iattr->ia_size,
 					XFS_B_ASYNC, FI_NONE);
 		}
 
@@ -393,7 +368,7 @@ xfs_setattr(
 		vn_iowait(ip);
 
 		if (!code)
-			code = xfs_itruncate_data(ip, vap->va_size);
+			code = xfs_itruncate_data(ip, iattr->ia_size);
 		if (code) {
 			ASSERT(tp == NULL);
 			lock_flags &= ~XFS_ILOCK_EXCL;
@@ -422,31 +397,30 @@ xfs_setattr(
 	/*
 	 * Truncate file.  Must have write permission and not be a directory.
 	 */
-	if (mask & XFS_AT_SIZE) {
+	if (mask & ATTR_SIZE) {
 		/*
 		 * Only change the c/mtime if we are changing the size
 		 * or we are explicitly asked to change it. This handles
 		 * the semantic difference between truncate() and ftruncate()
 		 * as implemented in the VFS.
 		 */
-		if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+		if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
 			timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
 
-		if (vap->va_size > ip->i_size) {
-			ip->i_d.di_size = vap->va_size;
-			ip->i_size = vap->va_size;
-			if (!(flags & ATTR_DMI))
+		if (iattr->ia_size > ip->i_size) {
+			ip->i_d.di_size = iattr->ia_size;
+			ip->i_size = iattr->ia_size;
+			if (!(flags & XFS_ATTR_DMI))
 				xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-		} else if ((vap->va_size <= ip->i_size) ||
-			   ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+		} else if (iattr->ia_size <= ip->i_size ||
+			   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
 			/*
 			 * signal a sync transaction unless
 			 * we're truncating an already unlinked
 			 * file on a wsync filesystem
 			 */
-			code = xfs_itruncate_finish(&tp, ip,
-					    (xfs_fsize_t)vap->va_size,
+			code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
 					    XFS_DATA_FORK,
 					    ((ip->i_d.di_nlink != 0 ||
 					      !(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -468,9 +442,9 @@ xfs_setattr(
 	/*
 	 * Change file access modes.
 	 */
-	if (mask & XFS_AT_MODE) {
+	if (mask & ATTR_MODE) {
 		ip->i_d.di_mode &= S_IFMT;
-		ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+		ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
 
 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
 		timeflags |= XFS_ICHGTIME_CHG;
@@ -483,7 +457,7 @@ xfs_setattr(
 	 * and can change the group id only to a group of which he
 	 * or she is a member.
 	 */
-	if (mask & (XFS_AT_UID|XFS_AT_GID)) {
+	if (mask & (ATTR_UID|ATTR_GID)) {
 		/*
 		 * CAP_FSETID overrides the following restrictions:
 		 *
@@ -501,7 +475,7 @@ xfs_setattr(
 		 */
 		if (iuid != uid) {
 			if (XFS_IS_UQUOTA_ON(mp)) {
-				ASSERT(mask & XFS_AT_UID);
+				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
 				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
 							&ip->i_udquot, udqp);
@@ -511,7 +485,7 @@ xfs_setattr(
 		if (igid != gid) {
 			if (XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
-				ASSERT(mask & XFS_AT_GID);
+				ASSERT(mask & ATTR_GID);
 				ASSERT(gdqp);
 				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
 							&ip->i_gdquot, gdqp);
@@ -527,31 +501,31 @@ xfs_setattr(
 	/*
 	 * Change file access or modified times.
 	 */
-	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
-		if (mask & XFS_AT_ATIME) {
-			ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
-			ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+	if (mask & (ATTR_ATIME|ATTR_MTIME)) {
+		if (mask & ATTR_ATIME) {
+			ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+			ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
 			ip->i_update_core = 1;
 			timeflags &= ~XFS_ICHGTIME_ACC;
 		}
-		if (mask & XFS_AT_MTIME) {
-			ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
-			ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+		if (mask & ATTR_MTIME) {
+			ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+			ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
 			timeflags &= ~XFS_ICHGTIME_MOD;
 			timeflags |= XFS_ICHGTIME_CHG;
 		}
-		if (tp && (flags & ATTR_UTIME))
+		if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
 			xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
 	}
 
 	/*
-	 * Change file inode change time only if XFS_AT_CTIME set
+	 * Change file inode change time only if ATTR_CTIME set
 	 * AND we have been called by a DMI function.
 	 */
 
-	if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
-		ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
-		ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+	if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
 		ip->i_update_core = 1;
 		timeflags &= ~XFS_ICHGTIME_CHG;
 	}
@@ -560,7 +534,7 @@ xfs_setattr(
 	 * Send out timestamp changes that need to be set to the
 	 * current time.  Not done when called by a DMI function.
 	 */
-	if (timeflags && !(flags & ATTR_DMI))
+	if (timeflags && !(flags & XFS_ATTR_DMI))
 		xfs_ichgtime(ip, timeflags);
 
 	XFS_STATS_INC(xs_ig_attrchg);
@@ -598,7 +572,7 @@ xfs_setattr(
 	}
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
-	    !(flags & ATTR_DMI)) {
+	    !(flags & XFS_ATTR_DMI)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL, NULL, NULL,
 					0, 0, AT_DELAY_FLAG(flags));
@@ -3113,7 +3087,7 @@ xfs_alloc_file_space(
 
 	/*	Generate a DMAPI event if needed.	*/
 	if (alloc_type != 0 && offset < ip->i_size &&
-			(attr_flags&ATTR_DMI) == 0  &&
+			(attr_flags & XFS_ATTR_DMI) == 0  &&
 			DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
 		xfs_off_t           end_dmi_offset;
 
@@ -3227,7 +3201,7 @@ retry:
 		allocatesize_fsb -= allocated_fsb;
 	}
 dmapi_enospc_check:
-	if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
+	if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
 				ip, DM_RIGHT_NULL,
@@ -3374,7 +3348,7 @@ xfs_free_file_space(
 	end_dmi_offset = offset + len;
 	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
 
-	if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 &&
+	if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_size)
 			end_dmi_offset = ip->i_size;
@@ -3385,7 +3359,7 @@ xfs_free_file_space(
 			return error;
 	}
 
-	if (attr_flags & ATTR_NOLOCK)
+	if (attr_flags & XFS_ATTR_NOLOCK)
 		need_iolock = 0;
 	if (need_iolock) {
 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3562,7 +3536,7 @@ xfs_change_file_space(
 	xfs_off_t	startoffset;
 	xfs_off_t	llen;
 	xfs_trans_t	*tp;
-	bhv_vattr_t	va;
+	struct iattr	iattr;
 
 	xfs_itrace_entry(ip);
 
@@ -3636,10 +3610,10 @@ xfs_change_file_space(
 				break;
 		}
 
-		va.va_mask = XFS_AT_SIZE;
-		va.va_size = startoffset;
+		iattr.ia_valid = ATTR_SIZE;
+		iattr.ia_size = startoffset;
 
-		error = xfs_setattr(ip, &va, attr_flags, credp);
+		error = xfs_setattr(ip, &iattr, attr_flags, credp);
 
 		if (error)
 			return error;
@@ -3669,7 +3643,7 @@ xfs_change_file_space(
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_ihold(tp, ip);
 
-	if ((attr_flags & ATTR_DMI) == 0) {
+	if ((attr_flags & XFS_ATTR_DMI) == 0) {
 		ip->i_d.di_mode &= ~S_ISUID;
 
 		/*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 454fa9a3e52..e932a96bec5 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
 #define _XFS_VNODEOPS_H 1
 
 struct attrlist_cursor_kern;
-struct bhv_vattr;
 struct cred;
 struct file;
+struct iattr;
 struct inode;
 struct iovec;
 struct kiocb;
@@ -15,8 +15,12 @@ struct xfs_iomap;
 
 
 int xfs_open(struct xfs_inode *ip);
-int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
 		struct cred *credp);
+#define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
+#define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
+#define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
+
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);
 int xfs_release(struct xfs_inode *ip);
-- 
cgit v1.2.3-70-g09d2


From f13fae2d2a9372a5155d20bc9da4c14f02193277 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 21 Jul 2008 16:16:15 +1000
Subject: [XFS] Remove vn_revalidate calls in xfs.

These days most of the attributes in struct inode are properly kept in
sync by XFS. This patch removes the need for vn_revalidate completely by:

- keeping inode.i_flags uptodate after any flags are updated in

xfs_ioctl_setattr

- keeping i_mode, i_uid and i_gid uptodate in xfs_setattr

SGI-PV: 984566

SGI-Modid: xfs-linux-melb:xfs-kern:31679a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 29 +++++++++++++++++++++++--
 fs/xfs/linux-2.6/xfs_iops.c  | 16 +-------------
 fs/xfs/linux-2.6/xfs_vnode.c | 50 --------------------------------------------
 fs/xfs/linux-2.6/xfs_vnode.h |  1 -
 fs/xfs/linux-2.6/xfs_xattr.c |  7 ++-----
 fs/xfs/xfs_vnodeops.c        |  9 ++++++++
 6 files changed, 39 insertions(+), 73 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d1b0da6bcdc..acb978d9d08 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -923,6 +923,30 @@ xfs_set_diflags(
 	ip->i_d.di_flags = di_flags;
 }
 
+STATIC void
+xfs_diflags_to_linux(
+	struct xfs_inode	*ip)
+{
+	struct inode		*inode = XFS_ITOV(ip);
+	unsigned int		xflags = xfs_ip2xflags(ip);
+
+	if (xflags & XFS_XFLAG_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+	if (xflags & XFS_XFLAG_APPEND)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+	if (xflags & XFS_XFLAG_SYNC)
+		inode->i_flags |= S_SYNC;
+	else
+		inode->i_flags &= ~S_SYNC;
+	if (xflags & XFS_XFLAG_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+}
 
 #define FSX_PROJID	1
 #define FSX_EXTSIZE	2
@@ -1121,8 +1145,10 @@ xfs_ioctl_setattr(
 
 	if (mask & FSX_EXTSIZE)
 		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
-	if (mask & FSX_XFLAGS)
+	if (mask & FSX_XFLAGS) {
 		xfs_set_diflags(ip, fa->fsx_xflags);
+		xfs_diflags_to_linux(ip);
+	}
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
@@ -1160,7 +1186,6 @@ xfs_ioctl_setattr(
 				(mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
 	}
 
-	vn_revalidate(XFS_ITOV(ip));	/* update flags */
 	return 0;
 
  error_return:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 669bbdc2085..e88f5102808 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -650,21 +650,7 @@ xfs_vn_setattr(
 	struct dentry	*dentry,
 	struct iattr	*iattr)
 {
-	struct inode	*inode = dentry->d_inode;
-	int		error;
-
-	if (iattr->ia_valid & ATTR_ATIME)
-		inode->i_atime = iattr->ia_atime;
-
-	if (iattr->ia_valid & ATTR_MODE) {
-		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-			inode->i_mode &= ~S_ISGID;
-	}
-
-	error = xfs_setattr(XFS_I(inode), iattr, 0, NULL);
-	if (likely(!error))
-		vn_revalidate(vn_from_inode(inode));
-	return -error;
+	return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe00733..25488b6d988 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -82,56 +82,6 @@ vn_ioerror(
 		xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
 }
 
-/*
- * Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
- */
-int
-vn_revalidate(
-	bhv_vnode_t		*vp)
-{
-	struct inode		*inode = vn_to_inode(vp);
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	unsigned long		xflags;
-
-	xfs_itrace_entry(ip);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	inode->i_mode	    = ip->i_d.di_mode;
-	inode->i_uid	    = ip->i_d.di_uid;
-	inode->i_gid	    = ip->i_d.di_gid;
-	inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
-	inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
-	inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
-	inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-
-	xflags = xfs_ip2xflags(ip);
-	if (xflags & XFS_XFLAG_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-	if (xflags & XFS_XFLAG_APPEND)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
-	if (xflags & XFS_XFLAG_SYNC)
-		inode->i_flags |= S_SYNC;
-	else
-		inode->i_flags &= ~S_SYNC;
-	if (xflags & XFS_XFLAG_NOATIME)
-		inode->i_flags |= S_NOATIME;
-	else
-		inode->i_flags &= ~S_NOATIME;
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	xfs_iflags_clear(ip, XFS_IMODIFIED);
-	return 0;
-}
 
 /*
  * Add a reference to a referenced vnode.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 96e4a7b5391..41ca2cec5d3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -67,7 +67,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
 
 
 extern void	vn_init(void);
-extern int	vn_revalidate(bhv_vnode_t *);
 
 /*
  * Yeah, these don't take vnode anymore at all, all this should be
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index b4acb68fc9f..964621fde6e 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -64,7 +64,7 @@ static int
 xfs_xattr_system_set(struct inode *inode, const char *name,
 		const void *value, size_t size, int flags)
 {
-	int error, acl;
+	int acl;
 
 	acl = xfs_decode_acl(name);
 	if (acl < 0)
@@ -75,10 +75,7 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
 	if (!value)
 		return xfs_acl_vremove(inode, acl);
 
-	error = xfs_acl_vset(inode, (void *)value, size, acl);
-	if (!error)
-		vn_revalidate(inode);
-	return error;
+	return xfs_acl_vset(inode, (void *)value, size, acl);
 }
 
 static struct xattr_handler xfs_xattr_system_handler = {
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b792a121b1a..76a1166af82 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -83,6 +83,7 @@ xfs_setattr(
 	cred_t			*credp)
 {
 	xfs_mount_t		*mp = ip->i_mount;
+	struct inode		*inode = XFS_ITOV(ip);
 	int			mask = iattr->ia_valid;
 	xfs_trans_t		*tp;
 	int			code;
@@ -446,6 +447,9 @@ xfs_setattr(
 		ip->i_d.di_mode &= S_IFMT;
 		ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
 
+		inode->i_mode &= S_IFMT;
+		inode->i_mode |= iattr->ia_mode & ~S_IFMT;
+
 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
 		timeflags |= XFS_ICHGTIME_CHG;
 	}
@@ -481,6 +485,7 @@ xfs_setattr(
 							&ip->i_udquot, udqp);
 			}
 			ip->i_d.di_uid = uid;
+			inode->i_uid = uid;
 		}
 		if (igid != gid) {
 			if (XFS_IS_GQUOTA_ON(mp)) {
@@ -491,6 +496,7 @@ xfs_setattr(
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_gid = gid;
+			inode->i_gid = gid;
 		}
 
 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -503,12 +509,14 @@ xfs_setattr(
 	 */
 	if (mask & (ATTR_ATIME|ATTR_MTIME)) {
 		if (mask & ATTR_ATIME) {
+			inode->i_atime = iattr->ia_atime;
 			ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
 			ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
 			ip->i_update_core = 1;
 			timeflags &= ~XFS_ICHGTIME_ACC;
 		}
 		if (mask & ATTR_MTIME) {
+			inode->i_mtime = iattr->ia_mtime;
 			ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
 			ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
 			timeflags &= ~XFS_ICHGTIME_MOD;
@@ -524,6 +532,7 @@ xfs_setattr(
 	 */
 
 	if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+		inode->i_ctime = iattr->ia_ctime;
 		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
 		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
 		ip->i_update_core = 1;
-- 
cgit v1.2.3-70-g09d2


From 44051fed5763c4f55eb8a7eeae6ede52bc15f85f Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 29 Jul 2008 21:20:14 +0000
Subject: [CIFS] oid should also be checked against class in cifs asn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The oid coming back from asn1_header_decode is a primitive object so
class should be checked to be universal.

Acked-by: Love Hörnquist Åstrand <lha@kth.se>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/asn1.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 6bb440b257b..669d0640b6d 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -494,7 +494,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		/*      remember to free obj->oid */
 		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
 		if (rc) {
-			if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
+			if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
+			    (cls == ASN1_UNI)) {
 				rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
 				if (rc) {
 					rc = compare_oid(oid, oidlen,
-- 
cgit v1.2.3-70-g09d2


From 176803562b541ebf4744e44e6600fb60660255d5 Mon Sep 17 00:00:00 2001
From: Shirish Pargaonkar <shirishp@us.ibm.com>
Date: Tue, 29 Jul 2008 21:26:13 +0000
Subject: [CIFS] cifs send2 not retrying enough in some cases on full socket

There are cases in which, on a full socket which requires retry on
sending data by the app (cifs in this case), that we were not
retrying since we did not reinitialize a counter.

This fixes the retry logic to retry up to 15 seconds on stuck
sockets.

Signed-off-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/transport.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98..e286db9f5ee 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
 	cFYI(1, ("Sending smb:  total_len %d", total_len));
 	dump_smb(smb_buffer, len);
 
+	i = 0;
 	while (total_len) {
 		rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
 				    n_vec - first_vec, total_len);
-- 
cgit v1.2.3-70-g09d2


From 2f0e58ac3ad0bb2ec0924dc11f8e55d01f44ca90 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 31 Jul 2008 21:30:11 +0000
Subject: [CIFS] remove level of indentation from decode_negTokenInit

Most of this function takes place inside of an unnecessary "else"
clause. The other 2 cases both return 0, so we can remove some
indentation here.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/asn1.c   | 259 ++++++++++++++++++++++++++-----------------------------
 fs/cifs/cifsfs.h |   2 +-
 2 files changed, 125 insertions(+), 136 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 669d0640b6d..5fabd2caf93 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 
 	asn1_open(&ctx, security_blob, length);
 
+	/* GSSAPI header */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding negTokenInit header"));
 		return 0;
@@ -490,154 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		   || (tag != ASN1_EOC)) {
 		cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
 		return 0;
-	} else {
-		/*      remember to free obj->oid */
-		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-		if (rc) {
-			if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
-			    (cls == ASN1_UNI)) {
-				rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
-				if (rc) {
-					rc = compare_oid(oid, oidlen,
-							 SPNEGO_OID,
-							 SPNEGO_OID_LEN);
-					kfree(oid);
-				}
-			} else
-				rc = 0;
-		}
+	}
 
-		if (!rc) {
-			cFYI(1, ("Error decoding negTokenInit header"));
-			return 0;
-		}
+	/* Check for SPNEGO OID -- remember to free obj->oid */
+	rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+	if (rc) {
+		if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
+		    (cls == ASN1_UNI)) {
+			rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
+			if (rc) {
+				rc = compare_oid(oid, oidlen, SPNEGO_OID,
+						 SPNEGO_OID_LEN);
+				kfree(oid);
+			}
+		} else
+			rc = 0;
+	}
 
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding negTokenInit"));
-			return 0;
-		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-			   || (tag != ASN1_EOC)) {
-			cFYI(1,
-			     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
+	/* SPNEGO OID not present or garbled -- bail out */
+	if (!rc) {
+		cFYI(1, ("Error decoding negTokenInit header"));
+		return 0;
+	}
 
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding negTokenInit"));
-			return 0;
-		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-			   || (tag != ASN1_SEQ)) {
-			cFYI(1,
-			     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding negTokenInit"));
+		return 0;
+	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+		   || (tag != ASN1_EOC)) {
+		cFYI(1,
+		     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+		      cls, con, tag, end, *end));
+		return 0;
+	}
 
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding 2nd part of negTokenInit"));
-			return 0;
-		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-			   || (tag != ASN1_EOC)) {
-			cFYI(1,
-			     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding negTokenInit"));
+		return 0;
+	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+		   || (tag != ASN1_SEQ)) {
+		cFYI(1,
+		     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+		      cls, con, tag, end, *end));
+		return 0;
+	}
 
-		if (asn1_header_decode
-		    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding 2nd part of negTokenInit"));
-			return 0;
-		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-			   || (tag != ASN1_SEQ)) {
-			cFYI(1,
-			     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+		return 0;
+	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+		   || (tag != ASN1_EOC)) {
+		cFYI(1,
+		     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+		      cls, con, tag, end, *end));
+		return 0;
+	}
 
-		while (!asn1_eoc_decode(&ctx, sequence_end)) {
-			rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-			if (!rc) {
-				cFYI(1,
-				     ("Error decoding negTokenInit hdr exit2"));
-				return 0;
-			}
-			if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
-				if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
-
-					cFYI(1,
-					  ("OID len = %d oid = 0x%lx 0x%lx "
-					   "0x%lx 0x%lx",
-					   oidlen, *oid, *(oid + 1),
-					   *(oid + 2), *(oid + 3)));
-
-					if (compare_oid(oid, oidlen,
-							MSKRB5_OID,
-							MSKRB5_OID_LEN))
-						use_kerberos = true;
-					else if (compare_oid(oid, oidlen,
-							     KRB5_OID,
-							     KRB5_OID_LEN))
-						use_kerberos = true;
-					else if (compare_oid(oid, oidlen,
-							     NTLMSSP_OID,
-							     NTLMSSP_OID_LEN))
-						use_ntlmssp = true;
-
-					kfree(oid);
-				}
-			} else {
-				cFYI(1, ("Should be an oid what is going on?"));
-			}
-		}
+	if (asn1_header_decode
+	    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+		return 0;
+	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+		   || (tag != ASN1_SEQ)) {
+		cFYI(1,
+		     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+		      cls, con, tag, end, *end));
+		return 0;
+	}
 
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1,
-			     ("Error decoding last part negTokenInit exit3"));
-			return 0;
-		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-			/* tag = 3 indicating mechListMIC */
+	while (!asn1_eoc_decode(&ctx, sequence_end)) {
+		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+		if (!rc) {
 			cFYI(1,
-			     ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
-			      cls, con, tag, end, *end));
+			     ("Error decoding negTokenInit hdr exit2"));
 			return 0;
 		}
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1,
-			     ("Error decoding last part negTokenInit exit5"));
-			return 0;
-		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-			   || (tag != ASN1_SEQ)) {
-			cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
-				cls, con, tag, end, *end));
+		if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
+			if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
+
+				cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
+					 "0x%lx 0x%lx", oidlen, *oid,
+					 *(oid + 1), *(oid + 2), *(oid + 3)));
+
+				if (compare_oid(oid, oidlen, MSKRB5_OID,
+						MSKRB5_OID_LEN))
+					use_kerberos = true;
+				else if (compare_oid(oid, oidlen, KRB5_OID,
+						     KRB5_OID_LEN))
+					use_kerberos = true;
+				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
+						     NTLMSSP_OID_LEN))
+					use_ntlmssp = true;
+
+				kfree(oid);
+			}
+		} else {
+			cFYI(1, ("Should be an oid what is going on?"));
 		}
+	}
 
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1,
-			     ("Error decoding last part negTokenInit exit 7"));
-			return 0;
-		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-			cFYI(1,
-			     ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
-		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1,
-			     ("Error decoding last part negTokenInit exit9"));
-			return 0;
-		} else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
-			   || (tag != ASN1_GENSTR)) {
-			cFYI(1,
-			     ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
-			      cls, con, tag, end, *end));
-			return 0;
-		}
-		cFYI(1, ("Need to call asn1_octets_decode() function for %s",
-			 ctx.pointer));	/* is this UTF-8 or ASCII? */
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding last part negTokenInit exit3"));
+		return 0;
+	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+		/* tag = 3 indicating mechListMIC */
+		cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
+			 cls, con, tag, end, *end));
+		return 0;
+	}
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding last part negTokenInit exit5"));
+		return 0;
+	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+		   || (tag != ASN1_SEQ)) {
+		cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+			cls, con, tag, end, *end));
+	}
+
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding last part negTokenInit exit 7"));
+		return 0;
+	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+		cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
+			 cls, con, tag, end, *end));
+		return 0;
+	}
+	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		cFYI(1, ("Error decoding last part negTokenInit exit9"));
+		return 0;
+	} else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
+		   || (tag != ASN1_GENSTR)) {
+		cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
+			 cls, con, tag, end, *end));
+		return 0;
 	}
+	cFYI(1, ("Need to call asn1_octets_decode() function for %s",
+		 ctx.pointer));	/* is this UTF-8 or ASCII? */
 
 	if (use_kerberos)
 		*secType = Kerberos;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd1552..135c965c413 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.53"
+#define CIFS_VERSION   "1.54"
 #endif				/* _CIFSFS_H */
-- 
cgit v1.2.3-70-g09d2


From dacdd0e04768da1fd2b24a6ee274c582b40d0c5b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 17 Jul 2008 16:54:19 -0700
Subject: [PATCH] configfs: Include linux/err.h in linux/configfs.h

We now use PTR_ERR() in the ->make_item() and ->make_group() operations.
Folks including configfs.h need err.h.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c        | 2 +-
 include/linux/configfs.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063..2495f23e33f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1094,7 +1094,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	kfree(name);
 	if (ret) {
 		/*
-		 * If item == NULL, then link_obj() was never called.
+		 * If ret != 0, then link_obj() was never called.
 		 * There are no extra references to clean up.
 		 */
 		goto out_put;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index d62c19ff041..0a5491baf0b 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/err.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3-70-g09d2


From 4768e9b18dc63719209c68920d4ae52dc49b6161 Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Mon, 23 Jun 2008 14:16:17 +0200
Subject: [PATCH] configfs: Fix symlink() to a removing item

The rule for configfs symlinks is that symlinks always point to valid
config_items, and prevent the target from being removed. However,
configfs_symlink() only checks that it can grab a reference on the target item,
without ensuring that it remains alive until the symlink is correctly attached.

This patch makes configfs_symlink() fail whenever the target is being removed,
using the CONFIGFS_USET_DROPPING flag set by configfs_detach_prep() and
protected by configfs_dirent_lock.

This patch introduces a similar (weird?) behavior as with mkdir failures making
rmdir fail: if symlink() races with rmdir() of the parent directory (or its
youngest user-created ancestor if parent is a default group) or rmdir() of the
target directory, and then fails in configfs_create(), this can make the racing
rmdir() fail despite the concerned directory having no user-created entry (resp.
no symlink pointing to it or one of its default groups) in the end.
This behavior is fixed in later patches.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c     | 14 +++++++-------
 fs/configfs/symlink.c |  6 ++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 2495f23e33f..cb5ea44846a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -370,6 +370,9 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
 	struct configfs_dirent *sd;
 	int ret;
 
+	/* Mark that we're trying to drop the group */
+	parent_sd->s_type |= CONFIGFS_USET_DROPPING;
+
 	ret = -EBUSY;
 	if (!list_empty(&parent_sd->s_links))
 		goto out;
@@ -385,8 +388,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
 					*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
 				return -EAGAIN;
 			}
-			/* Mark that we're trying to drop the group */
-			sd->s_type |= CONFIGFS_USET_DROPPING;
 
 			/*
 			 * Yup, recursive.  If there's a problem, blame
@@ -414,12 +415,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
 	struct configfs_dirent *parent_sd = dentry->d_fsdata;
 	struct configfs_dirent *sd;
 
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+	parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
+		if (sd->s_type & CONFIGFS_USET_DEFAULT)
 			configfs_detach_rollback(sd->s_dentry);
-			sd->s_type &= ~CONFIGFS_USET_DROPPING;
-		}
-	}
 }
 
 static void detach_attrs(struct config_item * item)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40a..c12801a12c3 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -78,6 +78,12 @@ static int create_link(struct config_item *parent_item,
 	if (sl) {
 		sl->sl_target = config_item_get(item);
 		spin_lock(&configfs_dirent_lock);
+		if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
+			spin_unlock(&configfs_dirent_lock);
+			config_item_put(item);
+			kfree(sl);
+			return -ENOENT;
+		}
 		list_add(&sl->sl_list, &target_sd->s_links);
 		spin_unlock(&configfs_dirent_lock);
 		ret = configfs_create_link(sl, parent_item->ci_dentry,
-- 
cgit v1.2.3-70-g09d2


From 9a73d78cda750f12e25eb811878f2d9dbab1bc6e Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 20 Jun 2008 14:09:22 +0200
Subject: [PATCH] configfs: Fix failing symlink() making rmdir() fail

On a similar pattern as mkdir() vs rmdir(), a failing symlink() may make rmdir()
fail for the symlink's parent and the symlink's target as well.

failing symlink() making target's rmdir() fail:

	process 1:				process 2:
	symlink("A/S" -> "B")
	  allow_link()
	  create_link()
	    attach to "B" links list
						rmdir("B")
						  detach_prep("B")
						    error because of new link
	    configfs_create_link("A", "S")
	      error (eg -ENOMEM)

failing symlink() making parent's rmdir() fail:

	process 1:				process 2:
	symlink("A/D/S" -> "B")
	  allow_link()
	  create_link()
	    attach to "B" links list
	    configfs_create_link("A/D", "S")
	      make_dirent("A/D", "S")
						rmdir("A")
						  detach_prep("A")
						    detach_prep("A/D")
						      error because of "S"
	      create("S")
	        error (eg -ENOMEM)

We cannot use the same solution as for mkdir() vs rmdir(), since rmdir() on the
target cannot wait on the i_mutex of the new symlink's parent without risking a
deadlock (with other symlink() or sys_rename()). Instead we define a global
mutex protecting all configfs symlinks attachment, so that rmdir() can avoid the
races above.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/configfs_internal.h |  1 +
 fs/configfs/dir.c               | 10 ++++++++++
 fs/configfs/symlink.c           |  5 +++++
 3 files changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3e..5f61b26eba9 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -51,6 +51,7 @@ struct configfs_dirent {
 #define CONFIGFS_USET_IN_MKDIR	0x0200
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
+extern struct mutex configfs_symlink_mutex;
 extern spinlock_t configfs_dirent_lock;
 
 extern struct vfsmount * configfs_mount;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cb5ea44846a..4e228c80fe9 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1207,6 +1207,11 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return -EINVAL;
 	}
 
+	/*
+	 * Ensure that no racing symlink() will make detach_prep() fail while
+	 * the new link is temporarily attached
+	 */
+	mutex_lock(&configfs_symlink_mutex);
 	spin_lock(&configfs_dirent_lock);
 	do {
 		struct mutex *wait_mutex;
@@ -1215,6 +1220,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		if (ret) {
 			configfs_detach_rollback(dentry);
 			spin_unlock(&configfs_dirent_lock);
+			mutex_unlock(&configfs_symlink_mutex);
 			if (ret != -EAGAIN) {
 				config_item_put(parent_item);
 				return ret;
@@ -1224,10 +1230,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 			mutex_lock(wait_mutex);
 			mutex_unlock(wait_mutex);
 
+			mutex_lock(&configfs_symlink_mutex);
 			spin_lock(&configfs_dirent_lock);
 		}
 	} while (ret == -EAGAIN);
 	spin_unlock(&configfs_dirent_lock);
+	mutex_unlock(&configfs_symlink_mutex);
 
 	/* Get a working ref for the duration of this function */
 	item = configfs_get_config_item(dentry);
@@ -1517,11 +1525,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
 			  I_MUTEX_PARENT);
 	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+	mutex_lock(&configfs_symlink_mutex);
 	spin_lock(&configfs_dirent_lock);
 	if (configfs_detach_prep(dentry, NULL)) {
 		printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
 	}
 	spin_unlock(&configfs_dirent_lock);
+	mutex_unlock(&configfs_symlink_mutex);
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
 	mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c12801a12c3..61a886dbd60 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
 #include <linux/configfs.h>
 #include "configfs_internal.h"
 
+/* Protects attachments of new symlinks */
+DEFINE_MUTEX(configfs_symlink_mutex);
+
 static int item_depth(struct config_item * item)
 {
 	struct config_item * p = item;
@@ -147,7 +150,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 
 	ret = type->ct_item_ops->allow_link(parent_item, target_item);
 	if (!ret) {
+		mutex_lock(&configfs_symlink_mutex);
 		ret = create_link(parent_item, target_item, dentry);
+		mutex_unlock(&configfs_symlink_mutex);
 		if (ret && type->ct_item_ops->drop_link)
 			type->ct_item_ops->drop_link(parent_item,
 						     target_item);
-- 
cgit v1.2.3-70-g09d2


From 2a109f2a4155f168047aa2f5b3a170e279bef89a Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 4 Jul 2008 16:56:05 +0200
Subject: [PATCH] configfs: Prevent userspace from creating new entries under
 attaching directories

process 1: 					process 2:
configfs_mkdir("A")
  attach_group("A")
    attach_item("A")
      d_instantiate("A")
    populate_groups("A")
      mutex_lock("A")
      attach_group("A/B")
        attach_item("A")
          d_instantiate("A/B")
						mkdir("A/B/C")
						  do_path_lookup("A/B/C", LOOKUP_PARENT)
						    ok
						  lookup_create("A/B/C")
						    mutex_lock("A/B")
						    ok
						  configfs_mkdir("A/B/C")
						    ok
      attach_group("A/C")
        attach_item("A/C")
          d_instantiate("A/C")
        populate_groups("A/C")
          mutex_lock("A/C")
          attach_group("A/C/D")
            attach_item("A/C/D")
              failure
          mutex_unlock("A/C")
          detach_groups("A/C")
            nothing to do
						mkdir("A/C/E")
						  do_path_lookup("A/C/E", LOOKUP_PARENT)
						    ok
						  lookup_create("A/C/E")
						    mutex_lock("A/C")
						    ok
						  configfs_mkdir("A/C/E")
						    ok
        detach_item("A/C")
        d_delete("A/C")
      mutex_unlock("A")
      detach_groups("A")
        mutex_lock("A/B")
        detach_group("A/B")
	  detach_groups("A/B")
	    nothing since no _default_ group
          detach_item("A/B")
        mutex_unlock("A/B")
        d_delete("A/B")
    detach_item("A")
    d_delete("A")

Two bugs:

1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().

2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
   which may probably confuse VFS.

This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
	mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
	It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.

2/ is fixed by next commit.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/configfs_internal.h |  2 +
 fs/configfs/dir.c               | 91 ++++++++++++++++++++++++++++++++++++++---
 fs/configfs/symlink.c           | 15 +++++++
 3 files changed, 103 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 5f61b26eba9..762d287123c 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,6 +49,7 @@ struct configfs_dirent {
 #define CONFIGFS_USET_DEFAULT	0x0080
 #define CONFIGFS_USET_DROPPING	0x0100
 #define CONFIGFS_USET_IN_MKDIR	0x0200
+#define CONFIGFS_USET_CREATING	0x0400
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
 extern struct mutex configfs_symlink_mutex;
@@ -67,6 +68,7 @@ extern void configfs_inode_exit(void);
 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
 extern int configfs_make_dirent(struct configfs_dirent *,
 				struct dentry *, void *, umode_t, int);
+extern int configfs_dirent_is_ready(struct configfs_dirent *);
 
 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
 extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 4e228c80fe9..647499a3479 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
 	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
 	if (!error)
 		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-					     CONFIGFS_DIR);
+					     CONFIGFS_DIR | CONFIGFS_USET_CREATING);
 	if (!error) {
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
  *	configfs_create_dir - create a directory for an config_item.
  *	@item:		config_itemwe're creating directory for.
  *	@dentry:	config_item's dentry.
+ *
+ *	Note: user-created entries won't be allowed under this new directory
+ *	until it is validated by configfs_dir_set_ready()
  */
 
 static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
 	return error;
 }
 
+/*
+ * Allow userspace to create new entries under a new directory created with
+ * configfs_create_dir(), and under all of its chidlren directories recursively.
+ * @sd		configfs_dirent of the new directory to validate
+ *
+ * Caller must hold configfs_dirent_lock.
+ */
+static void configfs_dir_set_ready(struct configfs_dirent *sd)
+{
+	struct configfs_dirent *child_sd;
+
+	sd->s_type &= ~CONFIGFS_USET_CREATING;
+	list_for_each_entry(child_sd, &sd->s_children, s_sibling)
+		if (child_sd->s_type & CONFIGFS_USET_CREATING)
+			configfs_dir_set_ready(child_sd);
+}
+
+/*
+ * Check that a directory does not belong to a directory hierarchy being
+ * attached and not validated yet.
+ * @sd		configfs_dirent of the directory to check
+ *
+ * @return	non-zero iff the directory was validated
+ *
+ * Note: takes configfs_dirent_lock, so the result may change from false to true
+ * in two consecutive calls, but never from true to false.
+ */
+int configfs_dirent_is_ready(struct configfs_dirent *sd)
+{
+	int ret;
+
+	spin_lock(&configfs_dirent_lock);
+	ret = !(sd->s_type & CONFIGFS_USET_CREATING);
+	spin_unlock(&configfs_dirent_lock);
+
+	return ret;
+}
+
 int configfs_create_link(struct configfs_symlink *sl,
 			 struct dentry *parent,
 			 struct dentry *dentry)
@@ -330,7 +371,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
 	struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct configfs_dirent * sd;
 	int found = 0;
-	int err = 0;
+	int err;
+
+	/*
+	 * Fake invisibility if dir belongs to a group/default groups hierarchy
+	 * being attached
+	 *
+	 * This forbids userspace to read/write attributes of items which may
+	 * not complete their initialization, since the dentries of the
+	 * attributes won't be instantiated.
+	 */
+	err = -ENOENT;
+	if (!configfs_dirent_is_ready(parent_sd))
+		goto out;
 
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +406,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
 		return simple_lookup(dir, dentry, nd);
 	}
 
+out:
 	return ERR_PTR(err);
 }
 
@@ -1044,6 +1098,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	}
 
 	sd = dentry->d_parent->d_fsdata;
+
+	/*
+	 * Fake invisibility if dir belongs to a group/default groups hierarchy
+	 * being attached
+	 */
+	if (!configfs_dirent_is_ready(sd)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
 	if (!(sd->s_type & CONFIGFS_USET_DIR)) {
 		ret = -EPERM;
 		goto out;
@@ -1142,6 +1206,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	spin_lock(&configfs_dirent_lock);
 	sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+	if (!ret)
+		configfs_dir_set_ready(dentry->d_fsdata);
 	spin_unlock(&configfs_dirent_lock);
 
 out_unlink:
@@ -1322,13 +1388,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry * dentry = file->f_path.dentry;
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
+	int err;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	file->private_data = configfs_new_dirent(parent_sd, NULL);
+	/*
+	 * Fake invisibility if dir belongs to a group/default groups hierarchy
+	 * being attached
+	 */
+	err = -ENOENT;
+	if (configfs_dirent_is_ready(parent_sd)) {
+		file->private_data = configfs_new_dirent(parent_sd, NULL);
+		if (IS_ERR(file->private_data))
+			err = PTR_ERR(file->private_data);
+		else
+			err = 0;
+	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
-	return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
-
+	return err;
 }
 
 static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1499,6 +1576,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 		if (err) {
 			d_delete(dentry);
 			dput(dentry);
+		} else {
+			spin_lock(&configfs_dirent_lock);
+			configfs_dir_set_ready(dentry->d_fsdata);
+			spin_unlock(&configfs_dirent_lock);
 		}
 	}
 
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 61a886dbd60..bf74973b049 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -76,6 +76,9 @@ static int create_link(struct config_item *parent_item,
 	struct configfs_symlink *sl;
 	int ret;
 
+	ret = -ENOENT;
+	if (!configfs_dirent_is_ready(target_sd))
+		goto out;
 	ret = -ENOMEM;
 	sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
 	if (sl) {
@@ -100,6 +103,7 @@ static int create_link(struct config_item *parent_item,
 		}
 	}
 
+out:
 	return ret;
 }
 
@@ -129,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 {
 	int ret;
 	struct nameidata nd;
+	struct configfs_dirent *sd;
 	struct config_item *parent_item;
 	struct config_item *target_item;
 	struct config_item_type *type;
@@ -137,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 	if (dentry->d_parent == configfs_sb->s_root)
 		goto out;
 
+	sd = dentry->d_parent->d_fsdata;
+	/*
+	 * Fake invisibility if dir belongs to a group/default groups hierarchy
+	 * being attached
+	 */
+	ret = -ENOENT;
+	if (!configfs_dirent_is_ready(sd))
+		goto out;
+
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	type = parent_item->ci_type;
 
+	ret = -EPERM;
 	if (!type || !type->ct_item_ops ||
 	    !type->ct_item_ops->allow_link)
 		goto out_put;
-- 
cgit v1.2.3-70-g09d2


From 2e2ce171c3ba6f2753fb1fd2706b63683394da2d Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 4 Jul 2008 16:56:06 +0200
Subject: [PATCH] configfs: Lock new directory inodes before removing on
 cleanup after failure

Once a new configfs directory is created by configfs_attach_item() or
configfs_attach_group(), a failure in the remaining initialization steps leads
to removing a directory which inode the VFS may have already accessed.

This commit adds the necessary inode locking to safely remove configfs
directories while cleaning up after a failure. As an advantage, the locking
rules of populate_groups() and detach_groups() become the same: the caller must
have the group's inode mutex locked.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c | 48 +++++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 647499a3479..4d11479cf2c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -324,6 +324,8 @@ static void remove_dir(struct dentry * d)
  * The only thing special about this is that we remove any files in
  * the directory before we remove the directory, and we've inlined
  * what used to be configfs_rmdir() below, instead of calling separately.
+ *
+ * Caller holds the mutex of the item's inode
  */
 
 static void configfs_remove_dir(struct config_item * item)
@@ -612,36 +614,21 @@ static int create_default_group(struct config_group *parent_group,
 static int populate_groups(struct config_group *group)
 {
 	struct config_group *new_group;
-	struct dentry *dentry = group->cg_item.ci_dentry;
 	int ret = 0;
 	int i;
 
 	if (group->default_groups) {
-		/*
-		 * FYI, we're faking mkdir here
-		 * I'm not sure we need this semaphore, as we're called
-		 * from our parent's mkdir.  That holds our parent's
-		 * i_mutex, so afaik lookup cannot continue through our
-		 * parent to find us, let alone mess with our tree.
-		 * That said, taking our i_mutex is closer to mkdir
-		 * emulation, and shouldn't hurt.
-		 */
-		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
-
 		for (i = 0; group->default_groups[i]; i++) {
 			new_group = group->default_groups[i];
 
 			ret = create_default_group(group, new_group);
-			if (ret)
+			if (ret) {
+				detach_groups(group);
 				break;
+			}
 		}
-
-		mutex_unlock(&dentry->d_inode->i_mutex);
 	}
 
-	if (ret)
-		detach_groups(group);
-
 	return ret;
 }
 
@@ -756,7 +743,15 @@ static int configfs_attach_item(struct config_item *parent_item,
 	if (!ret) {
 		ret = populate_attrs(item);
 		if (ret) {
+			/*
+			 * We are going to remove an inode and its dentry but
+			 * the VFS may already have hit and used them. Thus,
+			 * we must lock them as rmdir() would.
+			 */
+			mutex_lock(&dentry->d_inode->i_mutex);
 			configfs_remove_dir(item);
+			dentry->d_inode->i_flags |= S_DEAD;
+			mutex_unlock(&dentry->d_inode->i_mutex);
 			d_delete(dentry);
 		}
 	}
@@ -764,6 +759,7 @@ static int configfs_attach_item(struct config_item *parent_item,
 	return ret;
 }
 
+/* Caller holds the mutex of the item's inode */
 static void configfs_detach_item(struct config_item *item)
 {
 	detach_attrs(item);
@@ -782,16 +778,30 @@ static int configfs_attach_group(struct config_item *parent_item,
 		sd = dentry->d_fsdata;
 		sd->s_type |= CONFIGFS_USET_DIR;
 
+		/*
+		 * FYI, we're faking mkdir in populate_groups()
+		 * We must lock the group's inode to avoid races with the VFS
+		 * which can already hit the inode and try to add/remove entries
+		 * under it.
+		 *
+		 * We must also lock the inode to remove it safely in case of
+		 * error, as rmdir() would.
+		 */
+		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
 		ret = populate_groups(to_config_group(item));
 		if (ret) {
 			configfs_detach_item(item);
-			d_delete(dentry);
+			dentry->d_inode->i_flags |= S_DEAD;
 		}
+		mutex_unlock(&dentry->d_inode->i_mutex);
+		if (ret)
+			d_delete(dentry);
 	}
 
 	return ret;
 }
 
+/* Caller holds the mutex of the group's inode */
 static void configfs_detach_group(struct config_item *item)
 {
 	detach_groups(to_config_group(item));
-- 
cgit v1.2.3-70-g09d2


From 99cefda42ac550863b5ae1df9e60322e377decf9 Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 27 Jun 2008 13:10:25 +0200
Subject: [PATCH] configfs: Fix open directory making rmdir() fail

When checking for user-created elements under an item to be removed by rmdir(),
configfs_detach_prep() counts fake configfs_dirents created by dir_open() as
user-created and fails when finding one. It is however perfectly valid to remove
a directory that is open.

Simply make configfs_detach_prep() skip fake configfs_dirent, like it already
does for attributes, and like detach_groups() does.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 4d11479cf2c..a89058b3988 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -435,7 +435,8 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
 
 	ret = 0;
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_type & CONFIGFS_NOT_PINNED)
+		if (!sd->s_element ||
+		    (sd->s_type & CONFIGFS_NOT_PINNED))
 			continue;
 		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
 			/* Abort if racing with mkdir() */
-- 
cgit v1.2.3-70-g09d2


From 70526b67443a980d5029d9cf06903bef731a4e96 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 17 Jun 2008 15:34:32 -0700
Subject: [PATCH] configfs: Pin configfs subsystems separately from new
 config_items.

configfs_mkdir() creates a new item by calling its parent's
->make_item/group() functions.  Once that object is created,
configfs_mkdir() calls try_module_get() on the new item's module.  If it
succeeds, the module owning the new item cannot be unloaded, and
configfs is safe to reference the item.

If the item and the subsystem it belongs to are part of the same module,
the subsystem is also pinned.  This is the common case.

However, if the subsystem is made up of multiple modules, this may not
pin the subsystem.  Thus, it would be possible to unload the toplevel
subsystem module while there is still a child item.  Thus, we now
try_module_get() the subsystem's module.  This only really affects
children of the toplevel subsystem group.  Deeper children already have
their parents pinned.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index a89058b3988..7a8db78a91d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1100,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct configfs_subsystem *subsys;
 	struct configfs_dirent *sd;
 	struct config_item_type *type;
-	struct module *owner = NULL;
+	struct module *subsys_owner = NULL, *new_item_owner = NULL;
 	char *name;
 
 	if (dentry->d_parent == configfs_sb->s_root) {
@@ -1137,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		goto out_put;
 	}
 
+	/*
+	 * The subsystem may belong to a different module than the item
+	 * being created.  We don't want to safely pin the new item but
+	 * fail to pin the subsystem it sits under.
+	 */
+	if (!subsys->su_group.cg_item.ci_type) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+	subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+	if (!try_module_get(subsys_owner)) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
 	name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
 	if (!name) {
 		ret = -ENOMEM;
-		goto out_put;
+		goto out_subsys_put;
 	}
 
 	snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1172,7 +1187,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		 * If ret != 0, then link_obj() was never called.
 		 * There are no extra references to clean up.
 		 */
-		goto out_put;
+		goto out_subsys_put;
 	}
 
 	/*
@@ -1186,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		goto out_unlink;
 	}
 
-	owner = type->ct_owner;
-	if (!try_module_get(owner)) {
+	new_item_owner = type->ct_owner;
+	if (!try_module_get(new_item_owner)) {
 		ret = -EINVAL;
 		goto out_unlink;
 	}
@@ -1236,9 +1251,13 @@ out_unlink:
 		mutex_unlock(&subsys->su_mutex);
 
 		if (module_got)
-			module_put(owner);
+			module_put(new_item_owner);
 	}
 
+out_subsys_put:
+	if (ret)
+		module_put(subsys_owner);
+
 out_put:
 	/*
 	 * link_obj()/link_group() took a reference from child->parent,
@@ -1257,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct config_item *item;
 	struct configfs_subsystem *subsys;
 	struct configfs_dirent *sd;
-	struct module *owner = NULL;
+	struct module *subsys_owner = NULL, *dead_item_owner = NULL;
 	int ret;
 
 	if (dentry->d_parent == configfs_sb->s_root)
@@ -1284,6 +1303,10 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return -EINVAL;
 	}
 
+	/* configfs_mkdir() shouldn't have allowed this */
+	BUG_ON(!subsys->su_group.cg_item.ci_type);
+	subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+
 	/*
 	 * Ensure that no racing symlink() will make detach_prep() fail while
 	 * the new link is temporarily attached
@@ -1321,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	config_item_put(parent_item);
 
 	if (item->ci_type)
-		owner = item->ci_type->ct_owner;
+		dead_item_owner = item->ci_type->ct_owner;
 
 	if (sd->s_type & CONFIGFS_USET_DIR) {
 		configfs_detach_group(item);
@@ -1343,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	/* Drop our reference from above */
 	config_item_put(item);
 
-	module_put(owner);
+	module_put(dead_item_owner);
+	module_put(subsys_owner);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From c69991aac71a8beb57c11d651c7fd4b24c32aa8b Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 14 Jul 2008 17:31:09 -0700
Subject: [PATCH 1/2] ocfs2: Add counter in struct ocfs2_dinode to track
 journal replays

This patch renames the ij_pad to ij_recovery_generation in struct ocfs2_dinode.
This will be used to keep count of journal replays after an unclean shutdown.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/ocfs2_fs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f194517762..4f619850ccf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
 		struct {		/* Info for journal system
 					   inodes */
 			__le32 ij_flags;	/* Mounted, version, etc. */
-			__le32 ij_pad;
+			__le32 ij_recovery_generation; /* Incremented when the
+							  journal is recovered
+							  after an unclean
+							  shutdown */
 		} journal1;
 	} id1;				/* Inode type dependant 1 */
 /*C0*/	union {
-- 
cgit v1.2.3-70-g09d2


From 539d8264093560b917ee3afe4c7f74e5da09d6a5 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 14 Jul 2008 17:31:10 -0700
Subject: [PATCH 2/2] ocfs2: Fix race between mount and recovery

As the fs recovery is asynchronous, there is a small chance that another
node can mount (and thus recover) the slot before the recovery thread
gets to it.

If this happens, the recovery thread will block indefinitely on the
journal/slot lock as that lock will be held for the duration of the mount
(by design) by the node assigned to that slot.

The solution implemented is to keep track of the journal replays using
a recovery generation in the journal inode, which will be incremented by the
thread replaying that journal. The recovery thread, before attempting the
blocking lock on the journal/slot lock, will compare the generation on disk
with what it has cached and skip recovery if it does not match.

This bug appears to have been inadvertently introduced during the mount/umount
vote removal by mainline commit 34d024f84345807bf44163fac84e921513dde323. In the
mount voting scheme, the messaging would indirectly indicate that the slot
was being recovered.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/journal.c | 173 ++++++++++++++++++++++++++++++++++++++++-------------
 fs/ocfs2/journal.h |   3 +-
 fs/ocfs2/ocfs2.h   |   2 +
 fs/ocfs2/super.c   |  12 +++-
 4 files changed, 148 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfd..7a37240f7a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
 static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-				      int dirty);
+				      int dirty, int replayed);
 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
 				 int slot_num);
 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
 	return status;
 }
 
+static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
+{
+	le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
+}
+
+static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
+{
+	return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
+}
+
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-				      int dirty)
+				      int dirty, int replayed)
 {
 	int status;
 	unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		flags &= ~OCFS2_JOURNAL_DIRTY_FL;
 	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
 
+	if (replayed)
+		ocfs2_bump_recovery_generation(fe);
+
 	status = ocfs2_write_block(osb, bh, journal->j_inode);
 	if (status < 0)
 		mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 		 * Do not toggle if flush was unsuccessful otherwise
 		 * will leave dirty metadata in a "clean" journal
 		 */
-		status = ocfs2_journal_toggle_dirty(osb, 0);
+		status = ocfs2_journal_toggle_dirty(osb, 0, 0);
 		if (status < 0)
 			mlog_errno(status);
 	}
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
 	}
 }
 
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 {
 	int status = 0;
 	struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
 
 	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
 
-	status = ocfs2_journal_toggle_dirty(osb, 1);
+	status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
 	if (status < 0) {
 		mlog_errno(status);
 		goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
 		goto bail;
 	}
 
-	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
+	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -1034,6 +1047,12 @@ restart:
 	spin_unlock(&osb->osb_lock);
 	mlog(0, "All nodes recovered\n");
 
+	/* Refresh all journal recovery generations from disk */
+	status = ocfs2_check_journals_nolocks(osb);
+	status = (status == -EROFS) ? 0 : status;
+	if (status < 0)
+		mlog_errno(status);
+
 	ocfs2_super_unlock(osb, 1);
 
 	/* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
 	mlog_exit_void();
 }
 
+static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
+				    int slot_num,
+				    struct buffer_head **bh,
+				    struct inode **ret_inode)
+{
+	int status = -EACCES;
+	struct inode *inode = NULL;
+
+	BUG_ON(slot_num >= osb->max_slots);
+
+	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+					    slot_num);
+	if (!inode || is_bad_inode(inode)) {
+		mlog_errno(status);
+		goto bail;
+	}
+	SET_INODE_JOURNAL(inode);
+
+	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = 0;
+
+bail:
+	if (inode) {
+		if (status || !ret_inode)
+			iput(inode);
+		else
+			*ret_inode = inode;
+	}
+	return status;
+}
+
 /* Does the actual journal replay and marks the journal inode as
  * clean. Will only replay if the journal inode is marked dirty. */
 static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	struct ocfs2_dinode *fe;
 	journal_t *journal = NULL;
 	struct buffer_head *bh = NULL;
+	u32 slot_reco_gen;
 
-	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
-					    slot_num);
-	if (inode == NULL) {
-		status = -EACCES;
+	status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
+	if (status) {
 		mlog_errno(status);
 		goto done;
 	}
-	if (is_bad_inode(inode)) {
-		status = -EACCES;
-		iput(inode);
-		inode = NULL;
-		mlog_errno(status);
+
+	fe = (struct ocfs2_dinode *)bh->b_data;
+	slot_reco_gen = ocfs2_get_recovery_generation(fe);
+	brelse(bh);
+	bh = NULL;
+
+	/*
+	 * As the fs recovery is asynchronous, there is a small chance that
+	 * another node mounted (and recovered) the slot before the recovery
+	 * thread could get the lock. To handle that, we dirty read the journal
+	 * inode for that slot to get the recovery generation. If it is
+	 * different than what we expected, the slot has been recovered.
+	 * If not, it needs recovery.
+	 */
+	if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
+		mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
+		     osb->slot_recovery_generations[slot_num], slot_reco_gen);
+		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
+		status = -EBUSY;
 		goto done;
 	}
-	SET_INODE_JOURNAL(inode);
+
+	/* Continue with recovery as the journal has not yet been recovered */
 
 	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
 	if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	fe = (struct ocfs2_dinode *) bh->b_data;
 
 	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
+	slot_reco_gen = ocfs2_get_recovery_generation(fe);
 
 	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
 		mlog(0, "No recovery required for node %d\n", node_num);
+		/* Refresh recovery generation for the slot */
+		osb->slot_recovery_generations[slot_num] = slot_reco_gen;
 		goto done;
 	}
 
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
 	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
 
+	/* Increment recovery generation to indicate successful recovery */
+	ocfs2_bump_recovery_generation(fe);
+	osb->slot_recovery_generations[slot_num] =
+					ocfs2_get_recovery_generation(fe);
+
 	status = ocfs2_write_block(osb, bh, inode);
 	if (status < 0)
 		mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 
 	status = ocfs2_replay_journal(osb, node_num, slot_num);
 	if (status < 0) {
+		if (status == -EBUSY) {
+			mlog(0, "Skipping recovery for slot %u (node %u) "
+			     "as another node has recovered it\n", slot_num,
+			     node_num);
+			status = 0;
+			goto done;
+		}
 		mlog_errno(status);
 		goto done;
 	}
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
 	unsigned int node_num;
 	int status, i;
+	struct buffer_head *bh = NULL;
+	struct ocfs2_dinode *di;
 
 	/* This is called with the super block cluster lock, so we
 	 * know that the slot map can't change underneath us. */
 
 	spin_lock(&osb->osb_lock);
 	for (i = 0; i < osb->max_slots; i++) {
+		/* Read journal inode to get the recovery generation */
+		status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
+		if (status) {
+			mlog_errno(status);
+			goto bail;
+		}
+		di = (struct ocfs2_dinode *)bh->b_data;
+		osb->slot_recovery_generations[i] =
+					ocfs2_get_recovery_generation(di);
+		brelse(bh);
+		bh = NULL;
+
+		mlog(0, "Slot %u recovery generation is %u\n", i,
+		     osb->slot_recovery_generations[i]);
+
 		if (i == osb->slot_num)
 			continue;
 
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
 	return 0;
 }
 
-/* Look for a dirty journal without taking any cluster locks. Used for
- * hard readonly access to determine whether the file system journals
- * require recovery. */
+/* Reads all the journal inodes without taking any cluster locks. Used
+ * for hard readonly access to determine whether any journal requires
+ * recovery. Also used to refresh the recovery generation numbers after
+ * a journal has been recovered by another node.
+ */
 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
 {
 	int ret = 0;
 	unsigned int slot;
-	struct buffer_head *di_bh;
+	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	struct inode *journal = NULL;
+	int journal_dirty = 0;
 
 	for(slot = 0; slot < osb->max_slots; slot++) {
-		journal = ocfs2_get_system_file_inode(osb,
-						      JOURNAL_SYSTEM_INODE,
-						      slot);
-		if (!journal || is_bad_inode(journal)) {
-			ret = -EACCES;
-			mlog_errno(ret);
-			goto out;
-		}
-
-		di_bh = NULL;
-		ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
-				       0, journal);
-		if (ret < 0) {
+		ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
+		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
 		di = (struct ocfs2_dinode *) di_bh->b_data;
 
+		osb->slot_recovery_generations[slot] =
+					ocfs2_get_recovery_generation(di);
+
 		if (le32_to_cpu(di->id1.journal1.ij_flags) &
 		    OCFS2_JOURNAL_DIRTY_FL)
-			ret = -EROFS;
+			journal_dirty = 1;
 
 		brelse(di_bh);
-		if (ret)
-			break;
+		di_bh = NULL;
 	}
 
 out:
-	if (journal)
-		iput(journal);
-
+	if (journal_dirty)
+		ret = -EROFS;
 	return ret;
 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532e..2178ebffa05 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int    ocfs2_journal_init(struct ocfs2_journal *journal,
 void   ocfs2_journal_shutdown(struct ocfs2_super *osb);
 int    ocfs2_journal_wipe(struct ocfs2_journal *journal,
 			  int full);
-int    ocfs2_journal_load(struct ocfs2_journal *journal, int local);
+int    ocfs2_journal_load(struct ocfs2_journal *journal, int local,
+			  int replayed);
 int    ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
 void   ocfs2_recovery_thread(struct ocfs2_super *osb,
 			     int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef..7f625f2b111 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
 
 	struct ocfs2_slot_info *slot_info;
 
+	u32 *slot_recovery_generations;
+
 	spinlock_t node_map_lock;
 
 	u64 root_blkno;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889a..88255d3f52b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	}
 	mlog(0, "max_slots for this device: %u\n", osb->max_slots);
 
+	osb->slot_recovery_generations =
+		kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
+			GFP_KERNEL);
+	if (!osb->slot_recovery_generations) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
 	init_waitqueue_head(&osb->osb_wipe_event);
 	osb->osb_orphan_wipes = kcalloc(osb->max_slots,
 					sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
 	local = ocfs2_mount_local(osb);
 
 	/* will play back anything left in the journal. */
-	status = ocfs2_journal_load(osb->journal, local);
+	status = ocfs2_journal_load(osb->journal, local, dirty);
 	if (status < 0) {
 		mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
 		goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
 	ocfs2_free_slot_info(osb);
 
 	kfree(osb->osb_orphan_wipes);
+	kfree(osb->slot_recovery_generations);
 	/* FIXME
 	 * This belongs in journal shutdown, but because we have to
 	 * allocate osb->journal at the start of ocfs2_initalize_osb(),
-- 
cgit v1.2.3-70-g09d2


From 961cecbee6786f4b1f1b8f695e87045b583f9f49 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 16 Jul 2008 17:22:22 -0700
Subject: [PATCH] ocfs2: Fix oops when racing files truncates with writes into
 an mmap region

This patch fixes an oops that is reproduced when one races writes to a mmap-ed
region with another process truncating the file.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/aops.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6..506c24fb507 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
 	for(i = 0; i < wc->w_num_pages; i++) {
 		tmppage = wc->w_pages[i];
 
-		if (ocfs2_should_order_data(inode))
-			walk_page_buffers(wc->w_handle, page_buffers(tmppage),
-					  from, to, NULL,
-					  ocfs2_journal_dirty_data);
-
-		block_commit_write(tmppage, from, to);
+		if (page_has_buffers(tmppage)) {
+			if (ocfs2_should_order_data(inode))
+				walk_page_buffers(wc->w_handle,
+						  page_buffers(tmppage),
+						  from, to, NULL,
+						  ocfs2_journal_dirty_data);
+
+			block_commit_write(tmppage, from, to);
+		}
 	}
 }
 
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 			to = PAGE_CACHE_SIZE;
 		}
 
-		if (ocfs2_should_order_data(inode))
-			walk_page_buffers(wc->w_handle, page_buffers(tmppage),
-					  from, to, NULL,
-					  ocfs2_journal_dirty_data);
-
-		block_commit_write(tmppage, from, to);
+		if (page_has_buffers(tmppage)) {
+			if (ocfs2_should_order_data(inode))
+				walk_page_buffers(wc->w_handle,
+						  page_buffers(tmppage),
+						  from, to, NULL,
+						  ocfs2_journal_dirty_data);
+			block_commit_write(tmppage, from, to);
+		}
 	}
 
 out_write_size:
-- 
cgit v1.2.3-70-g09d2


From c259ae52e204d42f8b2d484c85517a4c367030e1 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 21 Jul 2008 09:59:15 +0200
Subject: [PATCH] ocfs2: Release mutex in error handling code

The mutex is released on a successful return, so it would seem that it
should be released on an error return as well.

The semantic patch finds this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression l;
@@

mutex_lock(l);
... when != mutex_unlock(l)
    when any
    when strict
(
if (...) { ... when != mutex_unlock(l)
+   mutex_unlock(l);
    return ...;
}
|
mutex_unlock(l);
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1..ec2ed15c3da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
 out_rw_unlock:
 	ocfs2_rw_unlock(inode, 1);
 
-	mutex_unlock(&inode->i_mutex);
 out:
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 66b8bd3c405389213de1d6ba6c2565990f62004f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 1 Aug 2008 17:54:32 +0000
Subject: [CIFS] properly account for new user= field in SPNEGO upcall string
 allocation

...it doesn't look like it's being accounted for at the moment. Also
try to reorganize the calculation to make it a little more evident
what each piece means.

This should probably go to the stable series as well...

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifs_spnego.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6ae..2434ab0e879 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
 	.describe	= user_describe,
 };
 
-#define MAX_VER_STR_LEN   9 /* length of longest version string e.g.
-				strlen(";ver=0xFF") */
+#define MAX_VER_STR_LEN   8 /* length of longest version string e.g.
+				strlen("ver=0xFF") */
 #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
 			       in future could have strlen(";sec=ntlmsspi") */
 #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 	struct key *spnego_key;
 	const char *hostname = server->hostname;
 
-	/* BB: come up with better scheme for determining length */
-	/* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host=
-	   hostname sec=mechanism uid=0x uid */
-	desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 +
-		  strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2);
+	/* length of fields (with semicolons): ver=0xyz ip4=ipaddress
+	   host=hostname sec=mechanism uid=0xFF user=username */
+	desc_len = MAX_VER_STR_LEN +
+		   6 /* len of "host=" */ + strlen(hostname) +
+		   5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
+		   MAX_MECH_STR_LEN +
+		   7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
+		   6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
+
 	spnego_key = ERR_PTR(-ENOMEM);
 	description = kzalloc(desc_len, GFP_KERNEL);
 	if (description == NULL)
-- 
cgit v1.2.3-70-g09d2


From 17263849c7ad2279667dd298083eceefcd1b5845 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sat, 2 Aug 2008 13:59:37 +0900
Subject: fat: Fix allow_utime option

FAT has to handle the newly introduced ATTR_TIMES_SET for allow_utime
option.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/file.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8707a8cfa02..ddde37025ca 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 	return 0;
 }
 
+#define TIMES_SET_FLAGS	(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
 int fat_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 
 	/* Check for setting the inode time. */
 	ia_valid = attr->ia_valid;
-	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+	if (ia_valid & TIMES_SET_FLAGS) {
 		if (fat_allow_set_time(sbi, inode))
-			attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+			attr->ia_valid &= ~TIMES_SET_FLAGS;
 	}
 
 	error = inode_change_ok(inode, attr);
-- 
cgit v1.2.3-70-g09d2


From 7d55992d60caa390460bad1a974eb2b3c11538f4 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 2 Aug 2008 21:22:18 -0400
Subject: ext4: remove write-only variables from ext4_ordered_write_end

The variables 'from' and 'to' are not used anywhere.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0080999d2cd..59fbbe899ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1424,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
 {
 	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = mapping->host;
-	unsigned from, to;
 	int ret = 0, ret2;
 
-	from = pos & (PAGE_CACHE_SIZE - 1);
-	to = from + len;
-
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
-- 
cgit v1.2.3-70-g09d2


From 93d0ec851820688ef5b21a84e7460d3cf405f5c5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 08:00:48 -0400
Subject: remove locking around tcpSesAllocCount atomic variable

The global tcpSesAllocCount variable is an atomic already and doesn't
really need the extra locking around it. Remove the locking and just use
the atomic_inc_return and atomic_dec_return functions to make sure we
access it correctly.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b51d5777cde..34a1fc9dabf 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -351,11 +351,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
 	current->flags |= PF_MEMALLOC;
 	cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
-	write_lock(&GlobalSMBSeslock);
-	atomic_inc(&tcpSesAllocCount);
-	length = tcpSesAllocCount.counter;
-	write_unlock(&GlobalSMBSeslock);
-	if (length  > 1)
+
+	length = atomic_inc_return(&tcpSesAllocCount);
+	if (length > 1)
 		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
 				GFP_KERNEL);
 
@@ -745,14 +743,11 @@ multi_t2_fnd:
 		coming home not much else we can do but free the memory */
 	}
 
-	write_lock(&GlobalSMBSeslock);
-	atomic_dec(&tcpSesAllocCount);
-	length = tcpSesAllocCount.counter;
-
 	/* last chance to mark ses pointers invalid
 	if there are any pointing to this (e.g
 	if a crazy root user tried to kill cifsd
 	kernel thread explicitly this might happen) */
+	write_lock(&GlobalSMBSeslock);
 	list_for_each(tmp, &GlobalSMBSessionList) {
 		ses = list_entry(tmp, struct cifsSesInfo,
 				cifsSessionList);
@@ -763,6 +758,8 @@ multi_t2_fnd:
 
 	kfree(server->hostname);
 	kfree(server);
+
+	length = atomic_dec_return(&tcpSesAllocCount);
 	if (length  > 0)
 		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
 				GFP_KERNEL);
-- 
cgit v1.2.3-70-g09d2


From 1a3f7d98e5f50f21ce6fb1406a35531d9596c5c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 4 Aug 2008 16:50:38 -0700
Subject: Revert "UFS: add const to parser token table"

This reverts commit f9247273cb69ba101877e946d2d83044409cc8c5 (and
fb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 - "fix fs/nfs/nfsroot.c
compilation" - that fixed a missed conversion).

The changes cause problems for at least the sparc build.  Let's re-do
them when the exact issues are resolved.

Requested-by: Andrew Morton <akpm@linux-foundation.org>
Requested-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfsroot.c       | 2 +-
 fs/ufs/super.c         | 2 +-
 include/linux/parser.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25dae..46763d1cd39 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initconst tokens = {
+static match_table_t __initdata tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24..3141969b456 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	const struct match_token *tp = tokens;
+	struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index cc554ca8bc7..7dcd0507575 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -14,7 +14,7 @@ struct match_token {
 	const char *pattern;
 };
 
-typedef const struct match_token match_table_t[];
+typedef struct match_token match_table_t[];
 
 /* Maximum number of arguments that match_token will find in a pattern */
 enum {MAX_OPT_ARGS = 3};
-- 
cgit v1.2.3-70-g09d2


From 529ae9aaa08378cfe2a4350bded76f32cc8ff0ce Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:01:03 +0200
Subject: mm: rename page trylock

Converting page lock to new locking bitops requires a change of page flag
operation naming, so we might as well convert it to something nicer
(!TestSetPageLocked_Lock => trylock_page, SetPageLocked => set_page_locked).

This also facilitates lockdeping of page lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/sg.c           |  2 +-
 fs/afs/write.c              |  2 +-
 fs/cifs/file.c              |  2 +-
 fs/jbd/commit.c             |  4 +--
 fs/jbd2/commit.c            |  2 +-
 fs/reiserfs/journal.c       |  2 +-
 fs/splice.c                 |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c |  4 +--
 include/linux/page-flags.h  |  2 +-
 include/linux/pagemap.h     | 67 +++++++++++++++++++++++++++------------------
 mm/filemap.c                | 12 ++++----
 mm/memory.c                 |  2 +-
 mm/migrate.c                |  4 +--
 mm/rmap.c                   |  2 +-
 mm/shmem.c                  |  4 +--
 mm/swap.c                   |  2 +-
 mm/swap_state.c             |  8 +++---
 mm/swapfile.c               |  2 +-
 mm/truncate.c               |  4 +--
 mm/vmscan.c                 |  4 +--
 20 files changed, 74 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d3b8ebb8377..3d36270a8b4 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1747,7 +1747,7 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
                  */
 		flush_dcache_page(pages[i]);
 		/* ?? Is locking needed? I don't think so */
-		/* if (TestSetPageLocked(pages[i]))
+		/* if (!trylock_page(pages[i]))
 		   goto out_unlock; */
         }
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c48..065b4e10681 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 			page = pages[loop];
 			if (page->index > wb->last)
 				break;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				break;
 			if (!PageDirty(page) ||
 			    page_private(page) != (unsigned long) wb) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a..e692c42f24b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1280,7 +1280,7 @@ retry:
 
 			if (first < 0)
 				lock_page(page);
-			else if (TestSetPageLocked(page))
+			else if (!trylock_page(page))
 				break;
 
 			if (unlikely(page->mapping != mapping)) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d4..81a9ad7177c 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
 			spin_lock(&journal->j_list_lock);
 		}
 		if (unlikely(!buffer_uptodate(bh))) {
-			if (TestSetPageLocked(bh->b_page)) {
+			if (!trylock_page(bh->b_page)) {
 				spin_unlock(&journal->j_list_lock);
 				lock_page(bh->b_page);
 				spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index adf0395f318..f2ad061e95e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b..ce2208b2711 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
 static void release_buffer_page(struct buffer_head *bh)
 {
 	struct page *page = bh->b_page;
-	if (!page->mapping && !TestSetPageLocked(page)) {
+	if (!page->mapping && trylock_page(page)) {
 		page_cache_get(page);
 		put_bh(bh);
 		if (!page->mapping)
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af..1bbc6f4bb09 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			 * for an in-flight io page
 			 */
 			if (flags & SPLICE_F_NONBLOCK) {
-				if (TestSetPageLocked(page)) {
+				if (!trylock_page(page)) {
 					error = -EAGAIN;
 					break;
 				}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0b211cba190..fa73179233a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -675,7 +675,7 @@ xfs_probe_cluster(
 			} else
 				pg_offset = PAGE_CACHE_SIZE;
 
-			if (page->index == tindex && !TestSetPageLocked(page)) {
+			if (page->index == tindex && trylock_page(page)) {
 				pg_len = xfs_probe_page(page, pg_offset, mapped);
 				unlock_page(page);
 			}
@@ -759,7 +759,7 @@ xfs_convert_page(
 
 	if (page->index != tindex)
 		goto fail;
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto fail;
 	if (PageWriteback(page))
 		goto fail_unlock_page;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 25aaccdb2f2..c74d3e87531 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -163,7 +163,7 @@ static inline int Page##uname(struct page *page) 			\
 
 struct page;	/* forward declaration */
 
-PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked)
+TESTPAGEFLAG(Locked, locked)
 PAGEFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 69ed3cb1197..5da31c12101 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -250,29 +250,6 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, filler, data);
 }
 
-int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-extern void remove_from_page_cache(struct page *page);
-extern void __remove_from_page_cache(struct page *page);
-
-/*
- * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run SetPageLocked() against it.
- */
-static inline int add_to_page_cache(struct page *page,
-		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
-{
-	int error;
-
-	SetPageLocked(page);
-	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
-	if (unlikely(error))
-		ClearPageLocked(page);
-	return error;
-}
-
 /*
  * Return byte-offset into filesystem object for page.
  */
@@ -294,13 +271,28 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
+static inline void set_page_locked(struct page *page)
+{
+	set_bit(PG_locked, &page->flags);
+}
+
+static inline void clear_page_locked(struct page *page)
+{
+	clear_bit(PG_locked, &page->flags);
+}
+
+static inline int trylock_page(struct page *page)
+{
+	return !test_and_set_bit(PG_locked, &page->flags);
+}
+
 /*
  * lock_page may only be called if we have the page's inode pinned.
  */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page(page);
 }
 
@@ -312,7 +304,7 @@ static inline void lock_page(struct page *page)
 static inline int lock_page_killable(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		return __lock_page_killable(page);
 	return 0;
 }
@@ -324,7 +316,7 @@ static inline int lock_page_killable(struct page *page)
 static inline void lock_page_nosync(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page_nosync(page);
 }
 	
@@ -409,4 +401,27 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 	return ret;
 }
 
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+extern void remove_from_page_cache(struct page *page);
+extern void __remove_from_page_cache(struct page *page);
+
+/*
+ * Like add_to_page_cache_locked, but used to add newly allocated pages:
+ * the page is new, so we can just run set_page_locked() against it.
+ */
+static inline int add_to_page_cache(struct page *page,
+		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
+{
+	int error;
+
+	set_page_locked(page);
+	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
+	if (unlikely(error))
+		clear_page_locked(page);
+	return error;
+}
+
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d97d1ad5547..54e96865085 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
  * The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
 	smp_mb__before_clear_bit();
-	if (!TestClearPageLocked(page))
+	if (!test_and_clear_bit(PG_locked, &page->flags))
 		BUG();
 	smp_mb__after_clear_bit(); 
 	wake_up_page(page, PG_locked);
@@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 	struct page *page = find_get_page(mapping, index);
 
 	if (page) {
-		if (!TestSetPageLocked(page))
+		if (trylock_page(page))
 			return page;
 		page_cache_release(page);
 		return NULL;
@@ -1027,7 +1027,7 @@ find_page:
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				goto page_not_up_to_date;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 								desc, offset))
diff --git a/mm/memory.c b/mm/memory.c
index a472bcd4b06..1002f473f49 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1789,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * not dirty accountable.
 	 */
 	if (PageAnon(old_page)) {
-		if (!TestSetPageLocked(old_page)) {
+		if (trylock_page(old_page)) {
 			reuse = can_share_swap_page(old_page);
 			unlock_page(old_page);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 153572fb60b..2a80136b23b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -605,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	 * establishing additional references. We are the only one
 	 * holding a reference to the new page at this point.
 	 */
-	if (TestSetPageLocked(newpage))
+	if (!trylock_page(newpage))
 		BUG();
 
 	/* Prepare mapping for the new page.*/
@@ -667,7 +667,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	BUG_ON(charge);
 
 	rc = -EAGAIN;
-	if (TestSetPageLocked(page)) {
+	if (!trylock_page(page)) {
 		if (!force)
 			goto move_newpage;
 		lock_page(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 94a5246a3f9..1ea4e6fcee7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -422,7 +422,7 @@ int page_referenced(struct page *page, int is_locked,
 			referenced += page_referenced_anon(page, mem_cont);
 		else if (is_locked)
 			referenced += page_referenced_file(page, mem_cont);
-		else if (TestSetPageLocked(page))
+		else if (!trylock_page(page))
 			referenced++;
 		else {
 			if (page->mapping)
diff --git a/mm/shmem.c b/mm/shmem.c
index c1e5a3b4f75..04fb4f1ab88 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1265,7 +1265,7 @@ repeat:
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (TestSetPageLocked(swappage)) {
+		if (!trylock_page(swappage)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			wait_on_page_locked(swappage);
@@ -1329,7 +1329,7 @@ repeat:
 		shmem_swp_unmap(entry);
 		filepage = find_get_page(mapping, idx);
 		if (filepage &&
-		    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
+		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
 			spin_unlock(&info->lock);
 			wait_on_page_locked(filepage);
 			page_cache_release(filepage);
diff --git a/mm/swap.c b/mm/swap.c
index 7417a2adbe5..9e0cb311807 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -444,7 +444,7 @@ void pagevec_strip(struct pagevec *pvec)
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		if (PagePrivate(page) && !TestSetPageLocked(page)) {
+		if (PagePrivate(page) && trylock_page(page)) {
 			if (PagePrivate(page))
 				try_to_release_page(page, 0);
 			unlock_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b8035b05512..167cf2dc8a0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -201,7 +201,7 @@ void delete_from_swap_cache(struct page *page)
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (PageSwapCache(page) && trylock_page(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
 	}
@@ -302,9 +302,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		SetPageLocked(new_page);
+		set_page_locked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
-		if (!err) {
+		if (likely(!err)) {
 			/*
 			 * Initiate read into locked page and return.
 			 */
@@ -312,7 +312,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-		ClearPageLocked(new_page);
+		clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb7f79641f9..1e330f2998f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -403,7 +403,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
 			page = find_get_page(&swapper_space, entry.val);
-			if (page && unlikely(TestSetPageLocked(page))) {
+			if (page && unlikely(!trylock_page(page))) {
 				page_cache_release(page);
 				page = NULL;
 			}
diff --git a/mm/truncate.c b/mm/truncate.c
index 894e9a70699..250505091d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -187,7 +187,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (page_index > next)
 				next = page_index;
 			next++;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				continue;
 			if (PageWriteback(page)) {
 				unlock_page(page);
@@ -280,7 +280,7 @@ unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 			pgoff_t index;
 			int lock_failed;
 
-			lock_failed = TestSetPageLocked(page);
+			lock_failed = !trylock_page(page);
 
 			/*
 			 * We really shouldn't be looking at the ->index of an
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 75be453628b..1ff1a58e7c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -496,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		page = lru_to_page(page_list);
 		list_del(&page->lru);
 
-		if (TestSetPageLocked(page))
+		if (!trylock_page(page))
 			goto keep;
 
 		VM_BUG_ON(PageActive(page));
@@ -582,7 +582,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				 * A synchronous write - probably a ramdisk.  Go
 				 * ahead and try to reclaim the page.
 				 */
-				if (TestSetPageLocked(page))
+				if (!trylock_page(page))
 					goto keep;
 				if (PageDirty(page) || PageWriteback(page))
 					goto keep_locked;
-- 
cgit v1.2.3-70-g09d2


From ca5de404ff036a29b25e9a83f6919c9f606c5841 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:02:13 +0200
Subject: fs: rename buffer trylock

Like the page lock change, this also requires name change, so convert the
raw test_and_set bitop to a trylock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 4 ++--
 fs/jbd/commit.c             | 2 +-
 fs/ntfs/aops.c              | 2 +-
 fs/ntfs/compress.c          | 2 +-
 fs/ntfs/mft.c               | 4 ++--
 fs/reiserfs/inode.c         | 2 +-
 fs/reiserfs/journal.c       | 4 ++--
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 include/linux/buffer_head.h | 8 ++++++--
 9 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4dbe52948e8..38653e36e22 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
-		} else if (test_set_buffer_locked(bh)) {
+		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
 			continue;
 		}
@@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 
 		if (rw == SWRITE || rw == SWRITE_SYNC)
 			lock_buffer(bh);
-		else if (test_set_buffer_locked(bh))
+		else if (!trylock_buffer(bh))
 			continue;
 
 		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 81a9ad7177c..ae08c057e75 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -221,7 +221,7 @@ write_out_data:
 		 * blocking lock_buffer().
 		 */
 		if (buffer_dirty(bh)) {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				BUFFER_TRACE(bh, "needs blocking lock");
 				spin_unlock(&journal->j_list_lock);
 				/* Write out all data to prevent deadlocks */
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e4..b38f944f066 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
 		tbh = bhs[i];
 		if (!tbh)
 			continue;
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		/* The buffer dirty state is now irrelevant, just clean it. */
 		clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc50..9669541d011 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
 	for (i = 0; i < nr_bhs; i++) {
 		struct buffer_head *tbh = bhs[i];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			continue;
 		if (unlikely(buffer_uptodate(tbh))) {
 			unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e..17d32ca6bc3 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 			struct buffer_head *tbh = bhs[i_bhs];
 
-			if (unlikely(test_set_buffer_locked(tbh)))
+			if (!trylock_buffer(tbh))
 				BUG();
 			BUG_ON(!buffer_uptodate(tbh));
 			clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 		struct buffer_head *tbh = bhs[i_bhs];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		BUG_ON(!buffer_uptodate(tbh));
 		clear_buffer_dirty(tbh);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8..5699171212a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
 		} else {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				redirty_page_for_writepage(wbc, page);
 				continue;
 			}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ce2208b2711..c21df71943a 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
 		jh = JH_ENTRY(list->next);
 		bh = jh->bh;
 		get_bh(bh);
-		if (test_set_buffer_locked(bh)) {
+		if (!trylock_buffer(bh)) {
 			if (!buffer_dirty(bh)) {
 				list_move(&jh->list, &tmp);
 				goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
 {
 	PROC_INFO_INC(p_s_sb, journal.prepare);
 
-	if (test_set_buffer_locked(bh)) {
+	if (!trylock_buffer(bh)) {
 		if (!wait)
 			return 0;
 		lock_buffer(bh);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fa73179233a..fa47e43b8b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1104,7 +1104,7 @@ xfs_page_state_convert(
 			 * that we are writing into for the first time.
 			 */
 			type = IOMAP_NEW;
-			if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+			if (trylock_buffer(bh)) {
 				ASSERT(buffer_mapped(bh));
 				if (iomap_valid)
 					all_bh = 1;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 50cfe8ceb47..eadaab44015 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -115,7 +115,6 @@ BUFFER_FNS(Uptodate, uptodate)
 BUFFER_FNS(Dirty, dirty)
 TAS_BUFFER_FNS(Dirty, dirty)
 BUFFER_FNS(Lock, locked)
-TAS_BUFFER_FNS(Lock, locked)
 BUFFER_FNS(Req, req)
 TAS_BUFFER_FNS(Req, req)
 BUFFER_FNS(Mapped, mapped)
@@ -321,10 +320,15 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 		__wait_on_buffer(bh);
 }
 
+static inline int trylock_buffer(struct buffer_head *bh)
+{
+	return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+}
+
 static inline void lock_buffer(struct buffer_head *bh)
 {
 	might_sleep();
-	if (test_set_buffer_locked(bh))
+	if (!trylock_buffer(bh))
 		__lock_buffer(bh);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9e96af8525264973d8d1f800b0ddce0289fc0bdd Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Tue, 5 Aug 2008 14:38:40 +0530
Subject: Fix missing braces in cifs_revalidate()

Fix missing braces introduced during commit
cea218054ad277d6c126890213afde07b4eb1602.  Though setting wbrc to 0
keeps this from causing real bug, this should have been there.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 46e54d39461..0e5dccc2f79 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1310,10 +1310,11 @@ int cifs_revalidate(struct dentry *direntry)
 /*		if (S_ISDIR(direntry->d_inode->i_mode))
 			shrink_dcache_parent(direntry); */
 		if (S_ISREG(direntry->d_inode->i_mode)) {
-			if (direntry->d_inode->i_mapping)
+			if (direntry->d_inode->i_mapping) {
 				wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
 				if (wbrc)
 					CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
+			}
 			/* may eventually have to do this for open files too */
 			if (list_empty(&(cifsInode->openFileList))) {
 				/* changed on server - flush read ahead pages */
-- 
cgit v1.2.3-70-g09d2


From dc60bf1d8328076c27fe07c1849a5c8399bafc39 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Tue, 5 Aug 2008 13:01:33 -0700
Subject: omfs: fix warning

fs/omfs/inode.c:495: warning: format '%llx' expects type 'long long
	unsigned int', but argument 2 has type 'u64'
fs/omfs/inode.c:495: warning: format '%llx' expects type 'long
	long unsigned int', but argument 3 has type '__be64'

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Acked-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/omfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d865f553543..a95fe5984f4 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -492,7 +492,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
 		printk(KERN_ERR "omfs: block count discrepancy between "
 			"super and root blocks (%llx, %llx)\n",
-			sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks));
+			(unsigned long long)sbi->s_num_blocks,
+			(unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
 		goto out_brelse_bh2;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 7c44319dc6deb0028ef7811670bf1e4bc6644672 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Tue, 5 Aug 2008 13:01:34 -0700
Subject: proc: fix warnings

proc: fix warnings

 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 4 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 5 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 6 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 7 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 8 has type 'u64'
 fs/proc/base.c:2429: warning: format '%llu' expects type 'long long unsigned int', but argument 9 has type 'u64'

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Acked-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01ed610f9b8..a28840b11b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-			acct.rchar, acct.wchar,
-			acct.syscr, acct.syscw,
-			acct.read_bytes, acct.write_bytes,
-			acct.cancelled_write_bytes);
+			(unsigned long long)acct.rchar,
+			(unsigned long long)acct.wchar,
+			(unsigned long long)acct.syscr,
+			(unsigned long long)acct.syscw,
+			(unsigned long long)acct.read_bytes,
+			(unsigned long long)acct.write_bytes,
+			(unsigned long long)acct.cancelled_write_bytes);
 }
 
 static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
-- 
cgit v1.2.3-70-g09d2


From 4e1e7fb9e879d48011a887715d7966484d9644ea Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: bundle up Unix SET_PATH_INFO args into a struct and change name

We'd like to be able to use the unix SET_PATH_INFO_BASIC args to set
file times as well, but that makes the argument list rather long. Bundle
up the args for unix SET_PATH_INFO call into a struct. For now, we don't
actually use the times fields anywhere. That will be done in a follow-on
patch.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifspdu.h   |  2 +-
 fs/cifs/cifsproto.h | 17 +++++++++++++---
 fs/cifs/cifssmb.c   | 26 ++++++++++++------------
 fs/cifs/dir.c       | 58 +++++++++++++++++++++++++++++------------------------
 fs/cifs/file.c      | 19 +++++++++---------
 fs/cifs/inode.c     | 54 +++++++++++++++++++++++++++++--------------------
 6 files changed, 102 insertions(+), 74 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 409abce1273..d2a073edd1b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -262,7 +262,7 @@
  */
 #define CIFS_NO_HANDLE        0xFFFF
 
-#define NO_CHANGE_64          cpu_to_le64(0xFFFFFFFFFFFFFFFFULL)
+#define NO_CHANGE_64          0xFFFFFFFFFFFFFFFFULL
 #define NO_CHANGE_32          0xFFFFFFFFUL
 
 /* IPC$ in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f82..e65ff984909 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -191,9 +191,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
 extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
 			 __u64 size, __u16 fileHandle, __u32 opener_pid,
 			bool AllocSizeFlag);
-extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
-			char *full_path, __u64 mode, __u64 uid,
-			__u64 gid, dev_t dev,
+
+struct cifs_unix_set_info_args {
+	__u64	ctime;
+	__u64	atime;
+	__u64	mtime;
+	__u64	mode;
+	__u64	uid;
+	__u64	gid;
+	dev_t	device;
+};
+
+extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+			char *fileName,
+			const struct cifs_unix_set_info_args *args,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c621ffa2ca9..ced8eaa9d37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5013,10 +5013,9 @@ SetAttrLgcyRetry:
 #endif /* temporarily unneeded SetAttr legacy function */
 
 int
-CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
-		    char *fileName, __u64 mode, __u64 uid, __u64 gid,
-		    dev_t device, const struct nls_table *nls_codepage,
-		    int remap)
+CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
+		   const struct cifs_unix_set_info_args *args, 
+		   const struct nls_table *nls_codepage, int remap)
 {
 	TRANSACTION2_SPI_REQ *pSMB = NULL;
 	TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5025,6 +5024,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
 	int bytes_returned = 0;
 	FILE_UNIX_BASIC_INFO *data_offset;
 	__u16 params, param_offset, offset, count, byte_count;
+	__u64 mode = args->mode;
 
 	cFYI(1, ("In SetUID/GID/Mode"));
 setPermsRetry:
@@ -5080,16 +5080,16 @@ setPermsRetry:
 	set file size and do not want to truncate file size to zero
 	accidently as happened on one Samba server beta by putting
 	zero instead of -1 here */
-	data_offset->EndOfFile = NO_CHANGE_64;
-	data_offset->NumOfBytes = NO_CHANGE_64;
-	data_offset->LastStatusChange = NO_CHANGE_64;
-	data_offset->LastAccessTime = NO_CHANGE_64;
-	data_offset->LastModificationTime = NO_CHANGE_64;
-	data_offset->Uid = cpu_to_le64(uid);
-	data_offset->Gid = cpu_to_le64(gid);
+	data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
+	data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
+	data_offset->LastStatusChange = cpu_to_le64(args->ctime);
+	data_offset->LastAccessTime = cpu_to_le64(args->atime);
+	data_offset->LastModificationTime = cpu_to_le64(args->mtime);
+	data_offset->Uid = cpu_to_le64(args->uid);
+	data_offset->Gid = cpu_to_le64(args->gid);
 	/* better to leave device as zero when it is  */
-	data_offset->DevMajor = cpu_to_le64(MAJOR(device));
-	data_offset->DevMinor = cpu_to_le64(MINOR(device));
+	data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
+	data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
 	data_offset->Permissions = cpu_to_le64(mode);
 
 	if (S_ISREG(mode))
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c..634cf330fe0 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,26 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		/* If Open reported that we actually created a file
 		then we now have to set the mode if possible */
 		if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+			struct cifs_unix_set_info_args args = {
+				.mode	= mode,
+				.ctime	= NO_CHANGE_64,
+				.atime	= NO_CHANGE_64,
+				.mtime	= NO_CHANGE_64,
+				.device	= 0,
+			};
+
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-				CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
-					(__u64)current->fsuid,
-					(__u64)current->fsgid,
-					0 /* dev */,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
+				args.uid = (__u64) current->fsuid;
+				args.gid = (__u64) current->fsgid;
 			} else {
-				CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
-					(__u64)-1,
-					(__u64)-1,
-					0 /* dev */,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
+				args.uid = NO_CHANGE_64;
+				args.gid = NO_CHANGE_64;
 			}
+
+			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+				cifs_sb->local_nls,
+				cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		} else {
 			/* BB implement mode setting via Windows security
 			   descriptors e.g. */
@@ -357,21 +360,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 	if (full_path == NULL)
 		rc = -ENOMEM;
 	else if (pTcon->unix_ext) {
-		mode &= ~current->fs->umask;
+		struct cifs_unix_set_info_args args = {
+			.mode	= mode & ~current->fs->umask,
+			.ctime	= NO_CHANGE_64,
+			.atime	= NO_CHANGE_64,
+			.mtime	= NO_CHANGE_64,
+			.device	= device_number,
+		};
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-			rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-				mode, (__u64)current->fsuid,
-				(__u64)current->fsgid,
-				device_number, cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			args.uid = (__u64) current->fsuid;
+			args.gid = (__u64) current->fsgid;
 		} else {
-			rc = CIFSSMBUnixSetPerms(xid, pTcon,
-				full_path, mode, (__u64)-1, (__u64)-1,
-				device_number, cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			args.uid = NO_CHANGE_64;
+			args.gid = NO_CHANGE_64;
 		}
+		rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
+			&args, cifs_sb->local_nls,
+			cifs_sb->mnt_cifs_flags &
+				CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (!rc) {
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a..d40738d2ab5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
 		/* time to set mode which we can not set earlier due to
 		   problems creating new read-only files */
 		if (pTcon->unix_ext) {
-			CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-					    inode->i_mode,
-					    (__u64)-1, (__u64)-1, 0 /* dev */,
+			struct cifs_unix_set_info_args args = {
+				.mode	= inode->i_mode,
+				.uid	= NO_CHANGE_64,
+				.gid	= NO_CHANGE_64,
+				.ctime	= NO_CHANGE_64,
+				.atime	= NO_CHANGE_64,
+				.mtime	= NO_CHANGE_64,
+				.device	= 0,
+			};
+			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
 					    cifs_sb->local_nls,
 					    cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-		} else {
-			/* BB implement via Windows security descriptors eg
-			   CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
-					      -1, -1, local_nls);
-			   in the meantime could set r/o dos attribute when
-			   perms are eg: mode & 0222 == 0 */
 		}
 	}
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 0e5dccc2f79..024846719f1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -986,23 +986,24 @@ mkdir_get_info:
 				direntry->d_inode->i_nlink = 2;
 		mode &= ~current->fs->umask;
 		if (pTcon->unix_ext) {
+			struct cifs_unix_set_info_args args = {
+				.mode	= mode,
+				.ctime	= NO_CHANGE_64,
+				.atime	= NO_CHANGE_64,
+				.mtime	= NO_CHANGE_64,
+				.device	= 0,
+			};
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-				CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-						    mode,
-						    (__u64)current->fsuid,
-						    (__u64)current->fsgid,
-						    0 /* dev_t */,
-						    cifs_sb->local_nls,
-						    cifs_sb->mnt_cifs_flags &
-						    CIFS_MOUNT_MAP_SPECIAL_CHR);
+				args.uid = (__u64)current->fsuid;
+				args.gid = (__u64)current->fsgid;
 			} else {
-				CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-						    mode, (__u64)-1,
-						    (__u64)-1, 0 /* dev_t */,
-						    cifs_sb->local_nls,
-						    cifs_sb->mnt_cifs_flags &
-						    CIFS_MOUNT_MAP_SPECIAL_CHR);
+				args.uid = NO_CHANGE_64;
+				args.gid = NO_CHANGE_64;
 			}
+			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+					    cifs_sb->local_nls,
+					    cifs_sb->mnt_cifs_flags &
+					    CIFS_MOUNT_MAP_SPECIAL_CHR);
 		} else {
 			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
 			    (mode & S_IWUGO) == 0) {
@@ -1500,9 +1501,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	FILE_BASIC_INFO time_buf;
 	bool set_time = false;
 	bool set_dosattr = false;
-	__u64 mode = 0xFFFFFFFFFFFFFFFFULL;
-	__u64 uid = 0xFFFFFFFFFFFFFFFFULL;
-	__u64 gid = 0xFFFFFFFFFFFFFFFFULL;
+	__u64 mode = NO_CHANGE_64;
+	__u64 uid = NO_CHANGE_64;
+	__u64 gid = NO_CHANGE_64;
 	struct cifsInodeInfo *cifsInode;
 	struct inode *inode = direntry->d_inode;
 
@@ -1586,12 +1587,21 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	}
 
 	if ((pTcon->unix_ext)
-	    && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
-		rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
-					 0 /* dev_t */, cifs_sb->local_nls,
-					 cifs_sb->mnt_cifs_flags &
+	    && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID))) {
+		struct cifs_unix_set_info_args args = {
+			.mode	= mode,
+			.uid	= uid,
+			.gid	= gid,
+			.ctime	= NO_CHANGE_64,
+			.atime	= NO_CHANGE_64,
+			.mtime	= NO_CHANGE_64,
+			.device	= 0,
+		};
+		rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-	else if (attrs->ia_valid & ATTR_MODE) {
+	} else if (attrs->ia_valid & ATTR_MODE) {
 		rc = 0;
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-- 
cgit v1.2.3-70-g09d2


From 063ea27925d70b1d9dd4343d685f722f0274bfd1 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 6 Aug 2008 04:23:13 +0000
Subject: [CIFS] fix trailing whitespace

Jeff left trailing whitespace in previous patch

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index ced8eaa9d37..f6e83a78618 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5014,7 +5014,7 @@ SetAttrLgcyRetry:
 
 int
 CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
-		   const struct cifs_unix_set_info_args *args, 
+		   const struct cifs_unix_set_info_args *args,
 		   const struct nls_table *nls_codepage, int remap)
 {
 	TRANSACTION2_SPI_REQ *pSMB = NULL;
-- 
cgit v1.2.3-70-g09d2


From 6fc000e5190234c7e5b244d1e2095d50b630d63f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: change CIFSSMBSetTimes to CIFSSMBSetPathInfo

CIFSSMBSetTimes is a deceptive name. This function does more that just
set file times. Change it to CIFSSMBSetPathInfo, which is closer to its
real purpose.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h | 2 +-
 fs/cifs/cifssmb.c   | 6 +++---
 fs/cifs/inode.c     | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e65ff984909..eb2be992608 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,7 +172,7 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
 extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
 			struct kstatfs *FSData);
 
-extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
+extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
 			const char *fileName, const FILE_BASIC_INFO *data,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f6e83a78618..daf010a2ba8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4882,9 +4882,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 
 
 int
-CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName,
-		const FILE_BASIC_INFO *data,
-		const struct nls_table *nls_codepage, int remap)
+CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
+		   const char *fileName, const FILE_BASIC_INFO *data,
+		   const struct nls_table *nls_codepage, int remap)
 {
 	TRANSACTION2_SPI_REQ *pSMB = NULL;
 	TRANSACTION2_SPI_RSP *pSMBr = NULL;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 024846719f1..9d94afe9b60 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -737,7 +737,7 @@ psx_del_no_retry:
 			/* ATTRS set to normal clears r/o bit */
 			pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
 			if (!(pTcon->ses->flags & CIFS_SES_NT4))
-				rc = CIFSSMBSetTimes(xid, pTcon, full_path,
+				rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
 						     pinfo_buf,
 						     cifs_sb->local_nls,
 						     cifs_sb->mnt_cifs_flags &
@@ -1010,7 +1010,7 @@ mkdir_get_info:
 				FILE_BASIC_INFO pInfo;
 				memset(&pInfo, 0, sizeof(pInfo));
 				pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
-				CIFSSMBSetTimes(xid, pTcon, full_path,
+				CIFSSMBSetPathInfo(xid, pTcon, full_path,
 						&pInfo, cifs_sb->local_nls,
 						cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1680,8 +1680,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		/* In the future we should experiment - try setting timestamps
 		   via Handle (SetFileInfo) instead of by path */
 		if (!(pTcon->ses->flags & CIFS_SES_NT4))
-			rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
-					     cifs_sb->local_nls,
+			rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+					     &time_buf, cifs_sb->local_nls,
 					     cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		else
-- 
cgit v1.2.3-70-g09d2


From 2dd2dfa060650118661422d4e666ac804c388751 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: Rename CIFSSMBSetFileTimes to CIFSSMBSetFileInfo and add PID arg

The new name is more clear since this is also used to set file
attributes. We'll need the pid_of_opener arg so that we can
pass in filehandles of other pids and spare ourselves an open
call.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  5 +++--
 fs/cifs/cifssmb.c   | 11 ++++-------
 fs/cifs/inode.c     | 11 ++++++-----
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index eb2be992608..a729d083e6f 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -176,8 +176,9 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
 			const char *fileName, const FILE_BASIC_INFO *data,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
-extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
-			const FILE_BASIC_INFO *data, __u16 fid);
+extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+			const FILE_BASIC_INFO *data, __u16 fid,
+			__u32 pid_of_opener);
 #if 0
 extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
 			char *fileName, __u16 dos_attributes,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index daf010a2ba8..6e8e8fc04c0 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4816,8 +4816,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
    time and resort to the original setpathinfo level which takes the ancient
    DOS time format with 2 second granularity */
 int
-CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
-		    const FILE_BASIC_INFO *data, __u16 fid)
+CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+		    const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
 {
 	struct smb_com_transaction2_sfi_req *pSMB  = NULL;
 	char *data_offset;
@@ -4830,11 +4830,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 	if (rc)
 		return rc;
 
-	/* At this point there is no need to override the current pid
-	with the pid of the opener, but that could change if we someday
-	use an existing handle (rather than opening one on the fly) */
-	/* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
-	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
+	pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
 
 	params = 6;
 	pSMB->MaxSetupCount = 0;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9d94afe9b60..d952914dfc4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -767,9 +767,10 @@ psx_del_no_retry:
 						 cifs_sb->mnt_cifs_flags &
 						    CIFS_MOUNT_MAP_SPECIAL_CHR);
 				if (rc == 0) {
-					rc = CIFSSMBSetFileTimes(xid, pTcon,
-								 pinfo_buf,
-								 netfid);
+					rc = CIFSSMBSetFileInfo(xid, pTcon,
+								pinfo_buf,
+								netfid,
+								current->tgid);
 					CIFSSMBClose(xid, pTcon, netfid);
 				}
 			}
@@ -1702,8 +1703,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 					 cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			if (rc == 0) {
-				rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
-							 netfid);
+				rc = CIFSSMBSetFileInfo(xid, pTcon, &time_buf,
+							 netfid, current->tgid);
 				CIFSSMBClose(xid, pTcon, netfid);
 			} else {
 			/* BB For even older servers we could convert time_buf
-- 
cgit v1.2.3-70-g09d2


From 95089910933e10768cfef1ab0bab0c55b962aacb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Aug 2008 04:39:02 +0000
Subject: [CIFS] cifs_mkdir and cifs_create should respect the setgid bit on
 parent dir

If a server supports unix extensions but does not support POSIX create
routines, then the client will create a new inode with a standard SMB
mkdir or create/open call and then will set the mode. When it does this,
it does not take the setgid bit on the parent directory into account.

This patch has CIFS flip on the setgid bit when the parent directory has
it. If the share is mounted with "setuids" then also change the group
owner to the gid of the parent.

This patch should apply cleanly on top of the setattr cleanup patches
that I sent a few weeks ago.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/dir.c   | 13 ++++++++++---
 fs/cifs/inode.c | 18 +++++++++++++++---
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 634cf330fe0..e962e75e6f7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -236,12 +236,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 				args.uid = (__u64) current->fsuid;
-				args.gid = (__u64) current->fsgid;
+				if (inode->i_mode & S_ISGID)
+					args.gid = (__u64) inode->i_gid;
+				else
+					args.gid = (__u64) current->fsgid;
 			} else {
 				args.uid = NO_CHANGE_64;
 				args.gid = NO_CHANGE_64;
 			}
-
 			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
 				cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
@@ -270,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 				    (cifs_sb->mnt_cifs_flags &
 				     CIFS_MOUNT_SET_UID)) {
 					newinode->i_uid = current->fsuid;
-					newinode->i_gid = current->fsgid;
+					if (inode->i_mode & S_ISGID)
+						newinode->i_gid =
+							inode->i_gid;
+					else
+						newinode->i_gid =
+							current->fsgid;
 				}
 			}
 		}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d952914dfc4..6d911896d74 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -985,7 +985,12 @@ mkdir_get_info:
 		  * failed to get it from the server or was set bogus */
 		if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
 				direntry->d_inode->i_nlink = 2;
+
 		mode &= ~current->fs->umask;
+		/* must turn on setgid bit if parent dir has it */
+		if (inode->i_mode & S_ISGID)
+			mode |= S_ISGID;
+
 		if (pTcon->unix_ext) {
 			struct cifs_unix_set_info_args args = {
 				.mode	= mode,
@@ -996,7 +1001,10 @@ mkdir_get_info:
 			};
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 				args.uid = (__u64)current->fsuid;
-				args.gid = (__u64)current->fsgid;
+				if (inode->i_mode & S_ISGID)
+					args.gid = (__u64)inode->i_gid;
+				else
+					args.gid = (__u64)current->fsgid;
 			} else {
 				args.uid = NO_CHANGE_64;
 				args.gid = NO_CHANGE_64;
@@ -1026,8 +1034,12 @@ mkdir_get_info:
 				     CIFS_MOUNT_SET_UID) {
 					direntry->d_inode->i_uid =
 						current->fsuid;
-					direntry->d_inode->i_gid =
-						current->fsgid;
+					if (inode->i_mode & S_ISGID)
+						direntry->d_inode->i_gid =
+							inode->i_gid;
+					else
+						direntry->d_inode->i_gid =
+							current->fsgid;
 				}
 			}
 		}
-- 
cgit v1.2.3-70-g09d2


From 26b994fad6a062697846a861ecc008447409dfb6 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 6 Aug 2008 05:11:33 +0000
Subject: [CIFS] Code cleanup in old sessionsetup code

Remove some long lines

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES   |   8 +++
 fs/cifs/connect.c | 150 ++++++++++++++++++++++++++----------------------------
 2 files changed, 80 insertions(+), 78 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fd..f5d0083e09f 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures.
+
 Version 1.53
 ------------
 DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 34a1fc9dabf..ff4345db720 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3620,97 +3620,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 		}
 		first_time = 1;
 	}
-	if (!rc) {
-		pSesInfo->flags = 0;
-		pSesInfo->capabilities = pSesInfo->server->capabilities;
-		if (linuxExtEnabled == 0)
-			pSesInfo->capabilities &= (~CAP_UNIX);
+
+	if (rc)
+		goto ss_err_exit;
+
+	pSesInfo->flags = 0;
+	pSesInfo->capabilities = pSesInfo->server->capabilities;
+	if (linuxExtEnabled == 0)
+		pSesInfo->capabilities &= (~CAP_UNIX);
 	/*	pSesInfo->sequence_number = 0;*/
-		cFYI(1,
-		      ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
-			pSesInfo->server->secMode,
-			pSesInfo->server->capabilities,
-			pSesInfo->server->timeAdj));
-		if (experimEnabled < 2)
-			rc = CIFS_SessSetup(xid, pSesInfo,
-					    first_time, nls_info);
-		else if (extended_security
-				&& (pSesInfo->capabilities
-					& CAP_EXTENDED_SECURITY)
-				&& (pSesInfo->server->secType == NTLMSSP)) {
-			rc = -EOPNOTSUPP;
-		} else if (extended_security
-			   && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-			   && (pSesInfo->server->secType == RawNTLMSSP)) {
-			cFYI(1, ("NTLMSSP sesssetup"));
-			rc = CIFSNTLMSSPNegotiateSessSetup(xid,
-						pSesInfo,
-						&ntlmv2_flag,
-						nls_info);
-			if (!rc) {
-				if (ntlmv2_flag) {
-					char *v2_response;
-					cFYI(1, ("more secure NTLM ver2 hash"));
-					if (CalcNTLMv2_partial_mac_key(pSesInfo,
-						nls_info)) {
-						rc = -ENOMEM;
-						goto ss_err_exit;
-					} else
-						v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL);
-					if (v2_response) {
-						CalcNTLMv2_response(pSesInfo,
-								   v2_response);
-				/*		if (first_time)
-						  cifs_calculate_ntlmv2_mac_key(
-						   pSesInfo->server->mac_signing_key,
-						   response, ntlm_session_key,*/
-						kfree(v2_response);
+	cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
+		 pSesInfo->server->secMode,
+		 pSesInfo->server->capabilities,
+		 pSesInfo->server->timeAdj));
+	if (experimEnabled < 2)
+		rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
+	else if (extended_security
+			&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+			&& (pSesInfo->server->secType == NTLMSSP)) {
+		rc = -EOPNOTSUPP;
+	} else if (extended_security
+			&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+			&& (pSesInfo->server->secType == RawNTLMSSP)) {
+		cFYI(1, ("NTLMSSP sesssetup"));
+		rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
+						   nls_info);
+		if (!rc) {
+			if (ntlmv2_flag) {
+				char *v2_response;
+				cFYI(1, ("more secure NTLM ver2 hash"));
+				if (CalcNTLMv2_partial_mac_key(pSesInfo,
+								nls_info)) {
+					rc = -ENOMEM;
+					goto ss_err_exit;
+				} else
+					v2_response = kmalloc(16 + 64 /* blob*/,
+								GFP_KERNEL);
+				if (v2_response) {
+					CalcNTLMv2_response(pSesInfo,
+								v2_response);
+				/*	if (first_time)
+						cifs_calculate_ntlmv2_mac_key */
+					kfree(v2_response);
 					/* BB Put dummy sig in SessSetup PDU? */
-					} else {
-						rc = -ENOMEM;
-						goto ss_err_exit;
-					}
-
 				} else {
-					SMBNTencrypt(pSesInfo->password,
-						pSesInfo->server->cryptKey,
-						ntlm_session_key);
-
-					if (first_time)
-						cifs_calculate_mac_key(
-							&pSesInfo->server->mac_signing_key,
-							ntlm_session_key,
-							pSesInfo->password);
+					rc = -ENOMEM;
+					goto ss_err_exit;
 				}
+
+			} else {
+				SMBNTencrypt(pSesInfo->password,
+					     pSesInfo->server->cryptKey,
+					     ntlm_session_key);
+
+				if (first_time)
+					cifs_calculate_mac_key(
+					     &pSesInfo->server->mac_signing_key,
+					     ntlm_session_key,
+					     pSesInfo->password);
+			}
 			/* for better security the weaker lanman hash not sent
 			   in AuthSessSetup so we no longer calculate it */
 
-				rc = CIFSNTLMSSPAuthSessSetup(xid,
-					pSesInfo,
-					ntlm_session_key,
-					ntlmv2_flag,
-					nls_info);
-			}
-		} else { /* old style NTLM 0.12 session setup */
-			SMBNTencrypt(pSesInfo->password,
-				pSesInfo->server->cryptKey,
-				ntlm_session_key);
+			rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
+						      ntlm_session_key,
+						      ntlmv2_flag,
+						      nls_info);
+		}
+	} else { /* old style NTLM 0.12 session setup */
+		SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
+			     ntlm_session_key);
 
-			if (first_time)
-				cifs_calculate_mac_key(
+		if (first_time)
+			cifs_calculate_mac_key(
 					&pSesInfo->server->mac_signing_key,
 					ntlm_session_key, pSesInfo->password);
 
-			rc = CIFSSessSetup(xid, pSesInfo,
-				ntlm_session_key, nls_info);
-		}
-		if (rc) {
-			cERROR(1, ("Send error in SessSetup = %d", rc));
-		} else {
-			cFYI(1, ("CIFS Session Established successfully"));
+		rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
+	}
+	if (rc) {
+		cERROR(1, ("Send error in SessSetup = %d", rc));
+	} else {
+		cFYI(1, ("CIFS Session Established successfully"));
 			pSesInfo->status = CifsGood;
-		}
 	}
+
 ss_err_exit:
 	return rc;
 }
-- 
cgit v1.2.3-70-g09d2


From 1ac0ae062cecd37587f5b951089f90e1d9d91769 Mon Sep 17 00:00:00 2001
From: Denis ChengRq <crquan@gmail.com>
Date: Mon, 4 Aug 2008 11:56:30 +0200
Subject: bio: make use of bvec_nr_vecs

Since introduced in 7ba1ba12eee, it should be made use of.

Signed-off-by: Denis ChengRq <crquan@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 25f1af0d81e..8000e2fa16c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
 	 */
 
 	bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-	if (bvl) {
-		struct biovec_slab *bp = bvec_slabs + *idx;
-
-		memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
-	}
+	if (bvl)
+		memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
 
 	return bvl;
 }
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 				goto out;
 			}
 			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+			bio->bi_max_vecs = bvec_nr_vecs(idx);
 		}
 		bio->bi_io_vec = bvl;
 	}
-- 
cgit v1.2.3-70-g09d2


From 3fe5c1dd0a8bf3756c447a28a578593176949d1d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: spin off cifs_setattr with unix extensions to its own function

Create a new cifs_setattr_unix function to handle a setattr when unix
extensions are enabled and have cifs_setattr call it. Also, clean up
variable declarations in cifs_setattr.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 157 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 119 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6d911896d74..f68d1abe13e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1504,30 +1504,138 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	return rc;
 }
 
+static int
+cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
+{
+	int rc;
+	int xid;
+	char *full_path = NULL;
+	struct inode *inode = direntry->d_inode;
+	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsTconInfo *pTcon = cifs_sb->tcon;
+	struct cifs_unix_set_info_args *args = NULL;
+
+	cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
+		 direntry->d_name.name, attrs->ia_valid));
+
+	xid = GetXid();
+
+	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
+		/* check if we have permission to change attrs */
+		rc = inode_change_ok(inode, attrs);
+		if (rc < 0)
+			goto out;
+		else
+			rc = 0;
+	}
+
+	full_path = build_path_from_dentry(direntry);
+	if (full_path == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+		/*
+		   Flush data before changing file size or changing the last
+		   write time of the file on the server. If the
+		   flush returns error, store it to report later and continue.
+		   BB: This should be smarter. Why bother flushing pages that
+		   will be truncated anyway? Also, should we error out here if
+		   the flush returns error?
+		 */
+		rc = filemap_write_and_wait(inode->i_mapping);
+		if (rc != 0) {
+			cifsInode->write_behind_rc = rc;
+			rc = 0;
+		}
+	}
+
+	if (attrs->ia_valid & ATTR_SIZE) {
+		rc = cifs_set_file_size(inode, attrs, xid, full_path);
+		if (rc != 0)
+			goto out;
+	}
+
+	/* skip mode change if it's just for clearing setuid/setgid */
+	if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+		attrs->ia_valid &= ~ATTR_MODE;
+
+	args = kmalloc(sizeof(*args), GFP_KERNEL);
+	if (args == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* set up the struct */
+	if (attrs->ia_valid & ATTR_MODE)
+		args->mode = attrs->ia_mode;
+	else
+		args->mode = NO_CHANGE_64;
+
+	if (attrs->ia_valid & ATTR_UID)
+		args->uid = attrs->ia_uid;
+	else
+		args->uid = NO_CHANGE_64;
+
+	if (attrs->ia_valid & ATTR_GID)
+		args->gid = attrs->ia_gid;
+	else
+		args->gid = NO_CHANGE_64;
+
+	if (attrs->ia_valid & ATTR_ATIME)
+		args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
+	else
+		args->atime = NO_CHANGE_64;
+
+	if (attrs->ia_valid & ATTR_MTIME)
+		args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
+	else
+		args->mtime = NO_CHANGE_64;
+
+	if (attrs->ia_valid & ATTR_CTIME)
+		args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
+	else
+		args->ctime = NO_CHANGE_64;
+
+	args->device = 0;
+	rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
+				cifs_sb->local_nls,
+				cifs_sb->mnt_cifs_flags &
+				CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	if (!rc)
+		rc = inode_setattr(inode, attrs);
+out:
+	kfree(args);
+	kfree(full_path);
+	FreeXid(xid);
+	return rc;
+}
+
 int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 {
 	int xid;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
+	struct inode *inode = direntry->d_inode;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsTconInfo *pTcon = cifs_sb->tcon;
+	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
 	char *full_path = NULL;
 	int rc = -EACCES;
 	FILE_BASIC_INFO time_buf;
 	bool set_time = false;
 	bool set_dosattr = false;
 	__u64 mode = NO_CHANGE_64;
-	__u64 uid = NO_CHANGE_64;
-	__u64 gid = NO_CHANGE_64;
-	struct cifsInodeInfo *cifsInode;
-	struct inode *inode = direntry->d_inode;
+
+	if (pTcon->unix_ext)
+		return cifs_setattr_unix(direntry, attrs);
 
 	xid = GetXid();
 
 	cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
 		 direntry->d_name.name, attrs->ia_valid));
 
-	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
-
 	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
 		/* check if we have permission to change attrs */
 		rc = inode_change_ok(inode, attrs);
@@ -1543,7 +1651,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		FreeXid(xid);
 		return -ENOMEM;
 	}
-	cifsInode = CIFS_I(inode);
 
 	if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
 		/*
@@ -1574,19 +1681,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	 * CIFSACL support + proper Windows to Unix idmapping, we may be
 	 * able to support this in the future.
 	 */
-	if (!pTcon->unix_ext &&
-	    !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
 		attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
-	} else {
-		if (attrs->ia_valid & ATTR_UID) {
-			cFYI(1, ("UID changed to %d", attrs->ia_uid));
-			uid = attrs->ia_uid;
-		}
-		if (attrs->ia_valid & ATTR_GID) {
-			cFYI(1, ("GID changed to %d", attrs->ia_gid));
-			gid = attrs->ia_gid;
-		}
-	}
 
 	time_buf.Attributes = 0;
 
@@ -1599,22 +1695,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		mode = attrs->ia_mode;
 	}
 
-	if ((pTcon->unix_ext)
-	    && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID))) {
-		struct cifs_unix_set_info_args args = {
-			.mode	= mode,
-			.uid	= uid,
-			.gid	= gid,
-			.ctime	= NO_CHANGE_64,
-			.atime	= NO_CHANGE_64,
-			.mtime	= NO_CHANGE_64,
-			.device	= 0,
-		};
-		rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
-	} else if (attrs->ia_valid & ATTR_MODE) {
+	if (attrs->ia_valid & ATTR_MODE) {
 		rc = 0;
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-- 
cgit v1.2.3-70-g09d2


From feb3e20cee25729447e1abdcb40c040b691d457a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: move file time and dos attribute setting logic into new function

Break up cifs_setattr further by moving the logic that sets file times
and dos attributes into a separate function. This patch also refactors
the logic a bit so that when the file is already open then we go ahead
and do a SetFileInfo call. SetPathInfo seems to be unreliable when
setting times on open files.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 196 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 109 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f68d1abe13e..5c722ea2113 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1504,6 +1504,101 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	return rc;
 }
 
+static int
+cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
+		    char *full_path, __u32 dosattr)
+{
+	int rc;
+	int oplock = 0;
+	__u16 netfid;
+	__u32 netpid;
+	bool set_time = false;
+	struct cifsFileInfo *open_file;
+	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsTconInfo *pTcon = cifs_sb->tcon;
+	FILE_BASIC_INFO	info_buf;
+
+	if (attrs->ia_valid & ATTR_ATIME) {
+		set_time = true;
+		info_buf.LastAccessTime =
+			cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+	} else
+		info_buf.LastAccessTime = 0;
+
+	if (attrs->ia_valid & ATTR_MTIME) {
+		set_time = true;
+		info_buf.LastWriteTime =
+		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
+	} else
+		info_buf.LastWriteTime = 0;
+
+	/*
+	 * Samba throws this field away, but windows may actually use it.
+	 * Do not set ctime unless other time stamps are changed explicitly
+	 * (i.e. by utimes()) since we would then have a mix of client and
+	 * server times.
+	 */
+	if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
+		cFYI(1, ("CIFS - CTIME changed"));
+		info_buf.ChangeTime =
+		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
+	} else
+		info_buf.ChangeTime = 0;
+
+	info_buf.CreationTime = 0;	/* don't change */
+	info_buf.Attributes = cpu_to_le32(dosattr);
+
+	/*
+	 * If the file is already open for write, just use that fileid
+	 */
+	open_file = find_writable_file(cifsInode);
+	if (open_file) {
+		netfid = open_file->netfid;
+		netpid = open_file->pid;
+		goto set_via_filehandle;
+	}
+
+	/*
+	 * NT4 apparently returns success on this call, but it doesn't
+	 * really work.
+	 */
+	if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
+		rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+				     &info_buf, cifs_sb->local_nls,
+				     cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
+		if (rc != -EOPNOTSUPP && rc != -EINVAL)
+			goto out;
+	}
+
+	cFYI(1, ("calling SetFileInfo since SetPathInfo for "
+		 "times not supported by this server"));
+	rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
+			 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+			 CREATE_NOT_DIR, &netfid, &oplock,
+			 NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags &
+				CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	if (rc != 0) {
+		if (rc == -EIO)
+			rc = -EINVAL;
+		goto out;
+	}
+
+	netpid = current->tgid;
+
+set_via_filehandle:
+	rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
+	if (open_file == NULL)
+		CIFSSMBClose(xid, pTcon, netfid);
+	else
+		atomic_dec(&open_file->wrtPending);
+out:
+	return rc;
+}
+
 static int
 cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 {
@@ -1623,9 +1718,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
 	char *full_path = NULL;
 	int rc = -EACCES;
-	FILE_BASIC_INFO time_buf;
-	bool set_time = false;
-	bool set_dosattr = false;
+	__u32 dosattr = 0;
 	__u64 mode = NO_CHANGE_64;
 
 	if (pTcon->unix_ext)
@@ -1684,8 +1777,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
 		attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
 
-	time_buf.Attributes = 0;
-
 	/* skip mode change if it's just for clearing setuid/setgid */
 	if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
 		attrs->ia_valid &= ~ATTR_MODE;
@@ -1704,24 +1795,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 #endif
 		if (((mode & S_IWUGO) == 0) &&
 		    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
-			set_dosattr = true;
-			time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
-							  ATTR_READONLY);
+
+			dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
+
 			/* fix up mode if we're not using dynperm */
 			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
 				attrs->ia_mode = inode->i_mode & ~S_IWUGO;
 		} else if ((mode & S_IWUGO) &&
 			   (cifsInode->cifsAttrs & ATTR_READONLY)) {
-			/* If file is readonly on server, we would
-			not be able to write to it - so if any write
-			bit is enabled for user or group or other we
-			need to at least try to remove r/o dos attr */
-			set_dosattr = true;
-			time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
-					    (~ATTR_READONLY));
-			/* Windows ignores set to zero */
-			if (time_buf.Attributes == 0)
-				time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
+
+			dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
+			/* Attributes of 0 are ignored */
+			if (dosattr == 0)
+				dosattr |= ATTR_NORMAL;
 
 			/* reset local inode permissions to normal */
 			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1739,82 +1825,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		}
 	}
 
-	if (attrs->ia_valid & ATTR_ATIME) {
-		set_time = true;
-		time_buf.LastAccessTime =
-		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
-	} else
-		time_buf.LastAccessTime = 0;
-
-	if (attrs->ia_valid & ATTR_MTIME) {
-		set_time = true;
-		time_buf.LastWriteTime =
-		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
-	} else
-		time_buf.LastWriteTime = 0;
-	/* Do not set ctime explicitly unless other time
-	   stamps are changed explicitly (i.e. by utime()
-	   since we would then have a mix of client and
-	   server times */
-
-	if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
-		set_time = true;
-		/* Although Samba throws this field away
-		it may be useful to Windows - but we do
-		not want to set ctime unless some other
-		timestamp is changing */
-		cFYI(1, ("CIFS - CTIME changed"));
-		time_buf.ChangeTime =
-		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
-	} else
-		time_buf.ChangeTime = 0;
-
-	if (set_time || set_dosattr) {
-		time_buf.CreationTime = 0;	/* do not change */
-		/* In the future we should experiment - try setting timestamps
-		   via Handle (SetFileInfo) instead of by path */
-		if (!(pTcon->ses->flags & CIFS_SES_NT4))
-			rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
-					     &time_buf, cifs_sb->local_nls,
-					     cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
-		else
-			rc = -EOPNOTSUPP;
-
-		if (rc == -EOPNOTSUPP) {
-			int oplock = 0;
-			__u16 netfid;
+	if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
+	    ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
+		rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
+		/* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
 
-			cFYI(1, ("calling SetFileInfo since SetPathInfo for "
-				 "times not supported by this server"));
-			/* BB we could scan to see if we already have it open
-			   and pass in pid of opener to function */
-			rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-					 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
-					 CREATE_NOT_DIR, &netfid, &oplock,
-					 NULL, cifs_sb->local_nls,
-					 cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
-			if (rc == 0) {
-				rc = CIFSSMBSetFileInfo(xid, pTcon, &time_buf,
-							 netfid, current->tgid);
-				CIFSSMBClose(xid, pTcon, netfid);
-			} else {
-			/* BB For even older servers we could convert time_buf
-			   into old DOS style which uses two second
-			   granularity */
-
-			/* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
-					&time_buf, cifs_sb->local_nls); */
-			}
-		}
 		/* Even if error on time set, no sense failing the call if
 		the server would set the time to a reasonable value anyway,
 		and this check ensures that we are not being called from
 		sys_utimes in which case we ought to fail the call back to
 		the user when the server rejects the call */
 		if ((rc) && (attrs->ia_valid &
-			 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
+				(ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
 			rc = 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 0510eeb7367aca017c6320d04cfd9cbc3b5dd992 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sat, 2 Aug 2008 07:26:12 -0400
Subject: turn cifs_setattr into a multiplexor that calls the correct function

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 5c722ea2113..28a22092d45 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1709,21 +1709,18 @@ out:
 	return rc;
 }
 
-int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+static int
+cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 {
 	int xid;
 	struct inode *inode = direntry->d_inode;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	struct cifsTconInfo *pTcon = cifs_sb->tcon;
 	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
 	char *full_path = NULL;
 	int rc = -EACCES;
 	__u32 dosattr = 0;
 	__u64 mode = NO_CHANGE_64;
 
-	if (pTcon->unix_ext)
-		return cifs_setattr_unix(direntry, attrs);
-
 	xid = GetXid();
 
 	cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
@@ -1850,6 +1847,21 @@ cifs_setattr_exit:
 	return rc;
 }
 
+int
+cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+	struct inode *inode = direntry->d_inode;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsTconInfo *pTcon = cifs_sb->tcon;
+
+	if (pTcon->unix_ext)
+		return cifs_setattr_unix(direntry, attrs);
+
+	return cifs_setattr_nounix(direntry, attrs);
+
+	/* BB: add cifs_setattr_legacy for really old servers */
+}
+
 #if 0
 void cifs_delete_inode(struct inode *inode)
 {
-- 
cgit v1.2.3-70-g09d2


From f1c7f79b6ab4f7ada002a0fae47f462ede6b6857 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 8 Aug 2008 19:26:42 +0300
Subject: [NFSD] uninline nfsd4_op_name()

There doesn't seem to be a compelling reason why nfsd4_op_name() is
marked as "inline":

It's only used in a dprintk(), and as long as it has only one caller
non-ancient gcc versions anyway inline it automatically.

This patch fixes the following compile error with gcc 3.4:

  ...
    CC      fs/nfsd/nfs4proc.o
  nfs4proc.c: In function `nfsd4_proc_compound':
  nfs4proc.c:854: sorry, unimplemented: inlining failed in call to
  nfs4proc.c:897: sorry, unimplemented: called from here
  make[3]: *** [fs/nfsd/nfs4proc.o] Error 1

Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
[ Also made it "const char *"  - Linus]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfs4proc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f..2e51adac65d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
 
 static struct nfsd4_operation nfsd4_ops[];
 
-static inline char *nfsd4_op_name(unsigned opnum);
+static const char *nfsd4_op_name(unsigned opnum);
 
 /*
  * COMPOUND call.
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 };
 
-static inline char *
-nfsd4_op_name(unsigned opnum)
+static const char *nfsd4_op_name(unsigned opnum)
 {
 	if (opnum < ARRAY_SIZE(nfsd4_ops))
 		return nfsd4_ops[opnum].op_name;
-- 
cgit v1.2.3-70-g09d2


From ad8b15f0ffc297cdd6c65ef7552b3b8abd11a401 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 8 Aug 2008 21:10:16 +0000
Subject: [CIFS] list entry can not return null

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifs_debug.c | 53 ++++++++++++++++++---------------------
 fs/cifs/cifsfs.c     | 71 ++++++++++++++++++++++++----------------------------
 fs/cifs/cifssmb.c    |  3 +--
 fs/cifs/connect.c    |  8 +++---
 4 files changed, 61 insertions(+), 74 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 688a2d42153..69a12aae91d 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
 	spin_lock(&GlobalMid_Lock);
 	list_for_each(tmp, &server->pending_mid_q) {
 		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-		if (mid_entry) {
-			cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
-				mid_entry->midState,
-				(int)mid_entry->command,
-				mid_entry->pid,
-				mid_entry->tsk,
-				mid_entry->mid));
+		cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+			mid_entry->midState,
+			(int)mid_entry->command,
+			mid_entry->pid,
+			mid_entry->tsk,
+			mid_entry->mid));
 #ifdef CONFIG_CIFS_STATS2
-			cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
-				mid_entry->largeBuf,
-				mid_entry->resp_buf,
-				mid_entry->when_received,
-				jiffies));
+		cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+			mid_entry->largeBuf,
+			mid_entry->resp_buf,
+			mid_entry->when_received,
+			jiffies));
 #endif /* STATS2 */
-			cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
-				  mid_entry->multiEnd));
-			if (mid_entry->resp_buf) {
-				cifs_dump_detail(mid_entry->resp_buf);
-				cifs_dump_mem("existing buf: ",
-					mid_entry->resp_buf, 62);
-			}
+		cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+			  mid_entry->multiEnd));
+		if (mid_entry->resp_buf) {
+			cifs_dump_detail(mid_entry->resp_buf);
+			cifs_dump_mem("existing buf: ",
+				mid_entry->resp_buf, 62);
 		}
 	}
 	spin_unlock(&GlobalMid_Lock);
@@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 				mid_entry = list_entry(tmp1, struct
 					mid_q_entry,
 					qhead);
-				if (mid_entry) {
-					seq_printf(m,
-							"State: %d com: %d pid:"
-							" %d tsk: %p mid %d\n",
-							mid_entry->midState,
-							(int)mid_entry->command,
-							mid_entry->pid,
-							mid_entry->tsk,
-							mid_entry->mid);
-				}
+				seq_printf(m, "State: %d com: %d pid:"
+						" %d tsk: %p mid %d\n",
+						mid_entry->midState,
+						(int)mid_entry->command,
+						mid_entry->pid,
+						mid_entry->tsk,
+						mid_entry->mid);
 			}
 			spin_unlock(&GlobalMid_Lock);
 		}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1ec7076f7b2..e8da4ee761b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -930,36 +930,34 @@ static int cifs_oplock_thread(void *dummyarg)
 			schedule_timeout(39*HZ);
 		} else {
 			oplock_item = list_entry(GlobalOplock_Q.next,
-				struct oplock_q_entry, qhead);
-			if (oplock_item) {
-				cFYI(1, ("found oplock item to write out"));
-				pTcon = oplock_item->tcon;
-				inode = oplock_item->pinode;
-				netfid = oplock_item->netfid;
-				spin_unlock(&GlobalMid_Lock);
-				DeleteOplockQEntry(oplock_item);
-				/* can not grab inode sem here since it would
+						struct oplock_q_entry, qhead);
+			cFYI(1, ("found oplock item to write out"));
+			pTcon = oplock_item->tcon;
+			inode = oplock_item->pinode;
+			netfid = oplock_item->netfid;
+			spin_unlock(&GlobalMid_Lock);
+			DeleteOplockQEntry(oplock_item);
+			/* can not grab inode sem here since it would
 				deadlock when oplock received on delete
 				since vfs_unlink holds the i_mutex across
 				the call */
-				/* mutex_lock(&inode->i_mutex);*/
-				if (S_ISREG(inode->i_mode)) {
-					rc =
-					   filemap_fdatawrite(inode->i_mapping);
-					if (CIFS_I(inode)->clientCanCacheRead
-									 == 0) {
-						waitrc = filemap_fdatawait(inode->i_mapping);
-						invalidate_remote_inode(inode);
-					}
-					if (rc == 0)
-						rc = waitrc;
-				} else
-					rc = 0;
-				/* mutex_unlock(&inode->i_mutex);*/
-				if (rc)
-					CIFS_I(inode)->write_behind_rc = rc;
-				cFYI(1, ("Oplock flush inode %p rc %d",
-					inode, rc));
+			/* mutex_lock(&inode->i_mutex);*/
+			if (S_ISREG(inode->i_mode)) {
+				rc = filemap_fdatawrite(inode->i_mapping);
+				if (CIFS_I(inode)->clientCanCacheRead == 0) {
+					waitrc = filemap_fdatawait(
+							      inode->i_mapping);
+					invalidate_remote_inode(inode);
+				}
+				if (rc == 0)
+					rc = waitrc;
+			} else
+				rc = 0;
+			/* mutex_unlock(&inode->i_mutex);*/
+			if (rc)
+				CIFS_I(inode)->write_behind_rc = rc;
+			cFYI(1, ("Oplock flush inode %p rc %d",
+				inode, rc));
 
 				/* releasing stale oplock after recent reconnect
 				of smb session using a now incorrect file
@@ -967,15 +965,13 @@ static int cifs_oplock_thread(void *dummyarg)
 				not bother sending an oplock release if session
 				to server still is disconnected since oplock
 				already released by the server in that case */
-				if (pTcon->tidStatus != CifsNeedReconnect) {
-				    rc = CIFSSMBLock(0, pTcon, netfid,
-					    0 /* len */ , 0 /* offset */, 0,
-					    0, LOCKING_ANDX_OPLOCK_RELEASE,
-					    false /* wait flag */);
-					cFYI(1, ("Oplock release rc = %d", rc));
-				}
-			} else
-				spin_unlock(&GlobalMid_Lock);
+			if (pTcon->tidStatus != CifsNeedReconnect) {
+				rc = CIFSSMBLock(0, pTcon, netfid,
+						0 /* len */ , 0 /* offset */, 0,
+						0, LOCKING_ANDX_OPLOCK_RELEASE,
+						false /* wait flag */);
+				cFYI(1, ("Oplock release rc = %d", rc));
+			}
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(1);  /* yield in case q were corrupt */
 		}
@@ -1001,8 +997,7 @@ static int cifs_dnotify_thread(void *dummyarg)
 		list_for_each(tmp, &GlobalSMBSessionList) {
 			ses = list_entry(tmp, struct cifsSesInfo,
 				cifsSessionList);
-			if (ses && ses->server &&
-			     atomic_read(&ses->server->inFlight))
+			if (ses->server && atomic_read(&ses->server->inFlight))
 				wake_up_all(&ses->server->response_q);
 		}
 		read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6e8e8fc04c0..994de7c9047 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
 	write_lock(&GlobalSMBSeslock);
 	list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
-		if (open_file)
-			open_file->invalidHandle = true;
+		open_file->invalidHandle = true;
 	}
 	write_unlock(&GlobalSMBSeslock);
 	/* BB Add call to invalidate_inodes(sb) for all superblocks mounted
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ff4345db720..0711db65afe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	}
 	list_for_each(tmp, &GlobalTreeConnectionList) {
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-		if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
+		if ((tcon->ses) && (tcon->ses->server == server))
 			tcon->tidStatus = CifsNeedReconnect;
 	}
 	read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		mid_entry = list_entry(tmp, struct
 					mid_q_entry,
 					qhead);
-		if (mid_entry) {
-			if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
+		if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
 				/* Mark other intransit requests as needing
 				   retry so we do not immediately mark the
 				   session bad again (ie after we reconnect
 				   below) as they timeout too */
-				mid_entry->midState = MID_RETRY_NEEDED;
-			}
+			mid_entry->midState = MID_RETRY_NEEDED;
 		}
 	}
 	spin_unlock(&GlobalMid_Lock);
-- 
cgit v1.2.3-70-g09d2