From 115ff50bade0f93a288677745a5884def6cbf9b1 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 26 Jul 2006 14:52:13 -0500
Subject: JFS: Quota support broken, no quota_read and quota_write

jfs_quota_read/write are very near duplicates of ext2_quota_read/write.

Cleaned up jfs_get_block as long as I had to change it to be non-static.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/inode.c     |  16 ++------
 fs/jfs/jfs_inode.h |   1 +
 fs/jfs/super.c     | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 119 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 43e3f566aad..a223cf4faa9 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode)
 	set_cflag(COMMIT_Dirty, inode);
 }
 
-static int
-jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
-			struct buffer_head *bh_result, int create)
+int jfs_get_block(struct inode *ip, sector_t lblock,
+		  struct buffer_head *bh_result, int create)
 {
 	s64 lblock64 = lblock;
 	int rc = 0;
 	xad_t xad;
 	s64 xaddr;
 	int xflag;
-	s32 xlen = max_blocks;
+	s32 xlen = bh_result->b_size >> ip->i_blkbits;
 
 	/*
 	 * Take appropriate lock on inode
@@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 		IREAD_LOCK(ip);
 
 	if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
-	    (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+	    (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
 	    xaddr) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
@@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 	return rc;
 }
 
-static int jfs_get_block(struct inode *ip, sector_t lblock,
-			 struct buffer_head *bh_result, int create)
-{
-	return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
-			bh_result, create);
-}
-
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return nobh_writepage(page, jfs_get_block, wbc);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b5c7da6190d..1fc48df670c 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
 extern void jfs_set_inode_flags(struct inode *);
+extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 extern const struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f6cfebc82d..90ee0de829c 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -26,6 +26,7 @@
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
 #include <linux/posix_acl.h>
+#include <linux/buffer_head.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
 
@@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			break;
 		}
 
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
 		case Opt_quota:
 		case Opt_usrquota:
 			*flag |= JFS_USRQUOTA;
@@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (sbi->flag & JFS_NOINTEGRITY)
 		seq_puts(seq, ",nointegrity");
 
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
 	if (sbi->flag & JFS_USRQUOTA)
 		seq_puts(seq, ",usrquota");
 
@@ -608,6 +609,112 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	return 0;
 }
 
+#ifdef CONFIG_QUOTA
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
+			      size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> sb->s_blocksize_bits;
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t toread;
+	struct buffer_head tmp_bh;
+	struct buffer_head *bh;
+	loff_t i_size = i_size_read(inode);
+
+	if (off > i_size)
+		return 0;
+	if (off+len > i_size)
+		len = i_size-off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = sb->s_blocksize - offset < toread ?
+				sb->s_blocksize - offset : toread;
+
+		tmp_bh.b_state = 0;
+		tmp_bh.b_size = 1 << inode->i_blkbits;
+		err = jfs_get_block(inode, blk, &tmp_bh, 0);
+		if (err)
+			return err;
+		if (!buffer_mapped(&tmp_bh))	/* A hole? */
+			memset(data, 0, tocopy);
+		else {
+			bh = sb_bread(sb, tmp_bh.b_blocknr);
+			if (!bh)
+				return -EIO;
+			memcpy(data, bh->b_data+offset, tocopy);
+			brelse(bh);
+		}
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+	return len;
+}
+
+/* Write to quotafile */
+static ssize_t jfs_quota_write(struct super_block *sb, int type,
+			       const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> sb->s_blocksize_bits;
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t towrite = len;
+	struct buffer_head tmp_bh;
+	struct buffer_head *bh;
+
+	mutex_lock(&inode->i_mutex);
+	while (towrite > 0) {
+		tocopy = sb->s_blocksize - offset < towrite ?
+				sb->s_blocksize - offset : towrite;
+
+		tmp_bh.b_state = 0;
+		err = jfs_get_block(inode, blk, &tmp_bh, 1);
+		if (err)
+			goto out;
+		if (offset || tocopy != sb->s_blocksize)
+			bh = sb_bread(sb, tmp_bh.b_blocknr);
+		else
+			bh = sb_getblk(sb, tmp_bh.b_blocknr);
+		if (!bh) {
+			err = -EIO;
+			goto out;
+		}
+		lock_buffer(bh);
+		memcpy(bh->b_data+offset, data, tocopy);
+		flush_dcache_page(bh->b_page);
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		brelse(bh);
+		offset = 0;
+		towrite -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+out:
+	if (len == towrite)
+		return err;
+	if (inode->i_size < off+len-towrite)
+		i_size_write(inode, off+len-towrite);
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(inode);
+	mutex_unlock(&inode->i_mutex);
+	return len - towrite;
+}
+
+#endif
+
 static struct super_operations jfs_super_operations = {
 	.alloc_inode	= jfs_alloc_inode,
 	.destroy_inode	= jfs_destroy_inode,
@@ -621,7 +728,11 @@ static struct super_operations jfs_super_operations = {
 	.unlockfs       = jfs_unlockfs,
 	.statfs		= jfs_statfs,
 	.remount_fs	= jfs_remount,
-	.show_options	= jfs_show_options
+	.show_options	= jfs_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= jfs_quota_read,
+	.quota_write	= jfs_quota_write,
+#endif
 };
 
 static struct export_operations jfs_export_operations = {
-- 
cgit v1.2.3-70-g09d2


From 8bcb2839b74d605f5549962a6e69dc07768e95b6 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 28 Jul 2006 08:46:05 -0500
Subject: JFS: Fix bug in quota code.  tmp_bh.b_size must be initialized

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 90ee0de829c..143bcd1d5ea 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -678,6 +678,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
 				sb->s_blocksize - offset : towrite;
 
 		tmp_bh.b_state = 0;
+		tmp_bh.b_size = 1 << inode->i_blkbits;
 		err = jfs_get_block(inode, blk, &tmp_bh, 1);
 		if (err)
 			goto out;
-- 
cgit v1.2.3-70-g09d2


From 4b1af774451bbc8440719e3fe441934a337c3b63 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Mon, 26 Jun 2006 15:17:47 -0700
Subject: ocfs2: Fix lvb corruption

Properly ignore LVB flags during a PR downconvert. This avoids an illegal
lvb update.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmunlock.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b0c3134f4f7..4ce3f82fb73 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -483,6 +483,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	/* lock was found on queue */
 	lksb = lock->lksb;
+	if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
+	    lock->ml.type != LKM_EXMODE)
+		flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
+
 	/* unlockast only called on originating node */
 	if (flags & LKM_PUT_LVB) {
 		lksb->flags |= DLM_LKSB_PUT_LVB;
@@ -632,6 +636,8 @@ retry:
 
 	spin_lock(&res->spinlock);
 	is_master = (res->owner == dlm->node_num);
+	if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
+		flags &= ~LKM_VALBLK;
 	spin_unlock(&res->spinlock);
 
 	if (is_master) {
-- 
cgit v1.2.3-70-g09d2


From a23eac99d4392b8b779305498d7614e41a0e16e9 Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Sun, 18 Jun 2006 21:28:01 -0700
Subject: ocfs2: do not modify lksb->status in the unlock ast

This can race with other ast notification, which can cause bad status values
to propagate into the unlock ast.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmunlock.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 4ce3f82fb73..59866e471d9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
 	else
 		status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
 
-	if (status != DLM_NORMAL)
+	if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
 		goto leave;
 
 	/* By now this has been masked out of cancel requests. */
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
 		spin_lock(&lock->spinlock);
 		/* if the master told us the lock was already granted,
 		 * let the ast handle all of these actions */
-		if (status == DLM_NORMAL &&
-		    lksb->status == DLM_CANCELGRANT) {
+		if (status == DLM_CANCELGRANT) {
 			actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
 				     DLM_UNLOCK_REGRANT_LOCK|
 				     DLM_UNLOCK_CLEAR_CONVERT_TYPE);
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
 					vec, veclen, owner, &status);
 	if (tmpret >= 0) {
 		// successfully sent and received
-		if (status == DLM_CANCELGRANT)
-			ret = DLM_NORMAL;
-		else if (status == DLM_FORWARD) {
+		if (status == DLM_FORWARD)
 			mlog(0, "master was in-progress.  retry\n");
-			ret = DLM_FORWARD;
-		} else
-			ret = status;
-		lksb->status = status;
+		ret = status;
 	} else {
 		mlog_errno(tmpret);
 		if (dlm_is_host_down(tmpret)) {
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
 			/* something bad.  this will BUG in ocfs2 */
 			ret = dlm_err_to_dlm_status(tmpret);
 		}
-		lksb->status = ret;
 	}
 
 	return ret;
@@ -511,11 +504,8 @@ not_found:
 			       "cookie=%u:%llu\n",
 			       dlm_get_lock_cookie_node(unlock->cookie),
 			       dlm_get_lock_cookie_seq(unlock->cookie));
-	else {
-		/* send the lksb->status back to the other node */
-		status = lksb->status;
+	else
 		dlm_lock_put(lock);
-	}
 
 leave:
 	if (res)
@@ -537,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
 
 	if (dlm_lock_on_list(&res->blocked, lock)) {
 		/* cancel this outright */
-		lksb->status = DLM_NORMAL;
 		status = DLM_NORMAL;
 		*actions = (DLM_UNLOCK_CALL_AST |
 			    DLM_UNLOCK_REMOVE_LOCK);
 	} else if (dlm_lock_on_list(&res->converting, lock)) {
 		/* cancel the request, put back on granted */
-		lksb->status = DLM_NORMAL;
 		status = DLM_NORMAL;
 		*actions = (DLM_UNLOCK_CALL_AST |
 			    DLM_UNLOCK_REMOVE_LOCK |
 			    DLM_UNLOCK_REGRANT_LOCK |
 			    DLM_UNLOCK_CLEAR_CONVERT_TYPE);
 	} else if (dlm_lock_on_list(&res->granted, lock)) {
-		/* too late, already granted.  DLM_CANCELGRANT */
-		lksb->status = DLM_CANCELGRANT;
-		status = DLM_NORMAL;
+		/* too late, already granted. */
+		status = DLM_CANCELGRANT;
 		*actions = DLM_UNLOCK_CALL_AST;
 	} else {
 		mlog(ML_ERROR, "lock to cancel is not on any list!\n");
-		lksb->status = DLM_IVLOCKID;
 		status = DLM_IVLOCKID;
 		*actions = 0;
 	}
@@ -573,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
 
 	/* unlock request */
 	if (!dlm_lock_on_list(&res->granted, lock)) {
-		lksb->status = DLM_DENIED;
 		status = DLM_DENIED;
 		dlm_error(status);
 		*actions = 0;
 	} else {
 		/* unlock granted lock */
-		lksb->status = DLM_NORMAL;
 		status = DLM_NORMAL;
 		*actions = (DLM_UNLOCK_FREE_LOCK |
 			    DLM_UNLOCK_CALL_AST |
@@ -671,7 +655,7 @@ retry:
 	}
 
 	if (call_ast) {
-		mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status);
+		mlog(0, "calling unlockast(%p, %d)\n", data, status);
 		if (is_master) {
 			/* it is possible that there is one last bast 
 			 * pending.  make sure it is flushed, then
@@ -683,9 +667,12 @@ retry:
 			wait_event(dlm->ast_wq, 
 				   dlm_lock_basts_flushed(dlm, lock));
 		}
-		(*unlockast)(data, lksb->status);
+		(*unlockast)(data, status);
 	}
 
+	if (status == DLM_CANCELGRANT)
+		status = DLM_NORMAL;
+
 	if (status == DLM_NORMAL) {
 		mlog(0, "kicking the thread\n");
 		dlm_kick_thread(dlm, res);
-- 
cgit v1.2.3-70-g09d2


From 34e3d180370c44ad3ecd3a1f9099e150d3bb103f Mon Sep 17 00:00:00 2001
From: Kurt Hackel <kurt.hackel@oracle.com>
Date: Sat, 15 Jul 2006 10:22:39 -0700
Subject: ocfs2: fix check for locally granted state during dlmunlock()

If a process requests a lock cancel but the lock has been remotely granted
already then there is no need to send the cancel message.

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmunlock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 59866e471d9..37be4b2e0d4 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
 	else
 		status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
 
-	if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
+	if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
 		goto leave;
 
 	/* By now this has been masked out of cancel requests. */
-- 
cgit v1.2.3-70-g09d2


From 9acd72f4240429dfd762c9a2c7eb5c18b5d32529 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 15 Jul 2006 02:36:01 +0200
Subject: [PATCH] fs/ocfs2/dlm/dlmmaster.c: unexport dlm_migrate_lockres

This patch removes the unused EXPORT_SYMBOL_GPL(dlm_migrate_lockres).

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 1b8346dd057..9503240ef0e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2375,7 +2375,6 @@ leave:
 	mlog(0, "returning %d\n", ret);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(dlm_migrate_lockres);
 
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-- 
cgit v1.2.3-70-g09d2


From 101ebf256de54e78e6d3277adacf656e125a2c5a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 2 May 2006 17:54:45 -0700
Subject: ocfs2: limit cluster bitmap information saved at mount

We were storing cluster count on the ocfs2_super structure, but never
actually using it so remove that. Also, we don't want to populate the
uptodate cache with the unlocked block read - it is technically safe as is,
but we should change it for correctness.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/ocfs2.h | 1 -
 fs/ocfs2/super.c | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index cd4a6f253d1..d52100d49b6 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -197,7 +197,6 @@ struct ocfs2_super
 	struct ocfs2_node_map recovery_map;
 	struct ocfs2_node_map umount_map;
 
-	u32 num_clusters;
 	u64 root_blkno;
 	u64 system_dir_blkno;
 	u64 bitmap_blkno;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 382706a67ff..d17e33e66a1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
 	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
 
+	/* We don't have a cluster lock on the bitmap here because
+	 * we're only interested in static information and the extra
+	 * complexity at mount time isn't worht it. Don't pass the
+	 * inode in to the read function though as we don't want it to
+	 * be put in the cache. */
 	status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
-				  inode);
+				  NULL);
 	iput(inode);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
 	di = (struct ocfs2_dinode *) bitmap_bh->b_data;
 	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
-	osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
 	brelse(bitmap_bh);
 	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
 	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
-- 
cgit v1.2.3-70-g09d2


From 7bf72edee614e10b8d470c40a326f47bfdd69992 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 3 May 2006 17:46:50 -0700
Subject: ocfs2: better group descriptor consistency checks

Try to catch corrupted group descriptors with some stronger checks placed in
a couple of strategic locations. Detect a failed resizefs and refuse to
allocate past what bitmap i_clusters allows.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/suballoc.c | 114 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 94 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 195523090c8..7ac68cac041 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -85,11 +85,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     u64 *bg_blkno);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 					 int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
-					     struct buffer_head *bg_bh,
-					     unsigned int bits_wanted,
-					     u16 *bit_off,
-					     u16 *bits_found);
 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
 					     struct inode *alloc_inode,
 					     struct ocfs2_group_desc *bg,
@@ -143,6 +138,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 	return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
+/* somewhat more expensive than our other checks, so use sparingly. */
+static int ocfs2_check_group_descriptor(struct super_block *sb,
+					struct ocfs2_dinode *di,
+					struct ocfs2_group_desc *gd)
+{
+	unsigned int max_bits;
+
+	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+		OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+		return -EIO;
+	}
+
+	if (di->i_blkno != gd->bg_parent_dinode) {
+		ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+			    "pointer (%llu, expected %llu)",
+			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			    (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+			    (unsigned long long)le64_to_cpu(di->i_blkno));
+		return -EIO;
+	}
+
+	max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+	if (le16_to_cpu(gd->bg_bits) > max_bits) {
+		ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			    le16_to_cpu(gd->bg_bits));
+		return -EIO;
+	}
+
+	if (le16_to_cpu(gd->bg_chain) >=
+	    le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+		ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			    le16_to_cpu(gd->bg_chain));
+		return -EIO;
+	}
+
+	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+		ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+			    "claims that %u are free",
+			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			    le16_to_cpu(gd->bg_bits),
+			    le16_to_cpu(gd->bg_free_bits_count));
+		return -EIO;
+	}
+
+	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+		ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+			    "max bitmap bits of %u",
+			    (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			    le16_to_cpu(gd->bg_bits),
+			    8 * le16_to_cpu(gd->bg_size));
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
 				  struct inode *alloc_inode,
 				  struct buffer_head *bg_bh,
@@ -663,6 +716,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
 					     struct buffer_head *bg_bh,
 					     unsigned int bits_wanted,
+					     unsigned int total_bits,
 					     u16 *bit_off,
 					     u16 *bits_found)
 {
@@ -679,10 +733,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
 	found = start = best_offset = best_size = 0;
 	bitmap = bg->bg_bitmap;
 
-	while((offset = ocfs2_find_next_zero_bit(bitmap,
-						 le16_to_cpu(bg->bg_bits),
-						 start)) != -1) {
-		if (offset == le16_to_cpu(bg->bg_bits))
+	while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
+		if (offset == total_bits)
 			break;
 
 		if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -911,14 +963,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 {
 	int search = -ENOSPC;
 	int ret;
-	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
 	u16 tmp_off, tmp_found;
+	unsigned int max_bits, gd_cluster_off;
 
 	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
 
-	if (bg->bg_free_bits_count) {
+	if (gd->bg_free_bits_count) {
+		max_bits = le16_to_cpu(gd->bg_bits);
+
+		/* Tail groups in cluster bitmaps which aren't cpg
+		 * aligned are prone to partial extention by a failed
+		 * fs resize. If the file system resize never got to
+		 * update the dinode cluster count, then we don't want
+		 * to trust any clusters past it, regardless of what
+		 * the group descriptor says. */
+		gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+							  le64_to_cpu(gd->bg_blkno));
+		if ((gd_cluster_off + max_bits) >
+		    OCFS2_I(inode)->ip_clusters) {
+			max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+			mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+			     (unsigned long long)le64_to_cpu(gd->bg_blkno),
+			     le16_to_cpu(gd->bg_bits),
+			     OCFS2_I(inode)->ip_clusters, max_bits);
+		}
+
 		ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
 							group_bh, bits_wanted,
+							max_bits,
 							&tmp_off, &tmp_found);
 		if (ret)
 			return ret;
@@ -951,6 +1024,7 @@ static int ocfs2_block_group_search(struct inode *inode,
 	if (bg->bg_free_bits_count)
 		ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
 							group_bh, bits_wanted,
+							le16_to_cpu(bg->bg_bits),
 							bit_off, bits_found);
 
 	return ret;
@@ -988,9 +1062,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 	bg = (struct ocfs2_group_desc *) group_bh->b_data;
-	if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-		status = -EIO;
+	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+	if (status) {
+		mlog_errno(status);
 		goto bail;
 	}
 
@@ -1018,9 +1092,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 			goto bail;
 		}
 		bg = (struct ocfs2_group_desc *) group_bh->b_data;
-		if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-			OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-			status = -EIO;
+		status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+		if (status) {
+			mlog_errno(status);
 			goto bail;
 		}
 	}
@@ -1494,9 +1568,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
 	}
 
 	group = (struct ocfs2_group_desc *) group_bh->b_data;
-	if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
-		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
-		status = -EIO;
+	status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
+	if (status) {
+		mlog_errno(status);
 		goto bail;
 	}
 	BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
-- 
cgit v1.2.3-70-g09d2


From 883d4cae4a2b01a05193cf2665c77b7489a8b6a0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 5 Jun 2006 16:41:00 -0400
Subject: ocfs2: allocation hints

Record the most recently used allocation group on the allocation context, so
that subsequent allocations can attempt to optimize for contiguousness.
Local alloc especially should benefit from this as the current chain search
tends to let it spew across the disk.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/localalloc.c |   8 +++
 fs/ocfs2/ocfs2.h      |   1 +
 fs/ocfs2/suballoc.c   | 147 +++++++++++++++++++++++++++++++++++++++++++++-----
 fs/ocfs2/suballoc.h   |   2 +
 4 files changed, 145 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0d1973ea32b..1f17a4d0828 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 
 	mlog(0, "Allocating %u clusters for a new window.\n",
 	     ocfs2_local_alloc_window_bits(osb));
+
+	/* Instruct the allocation code to try the most recently used
+	 * cluster group. We'll re-record the group used this pass
+	 * below. */
+	ac->ac_last_group = osb->la_last_gd;
+
 	/* we used the generic suballoc reserve function, but we set
 	 * everything up nicely, so there's no reason why we can't use
 	 * the more specific cluster api to claim bits. */
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 		goto bail;
 	}
 
+	osb->la_last_gd = ac->ac_last_group;
+
 	la->la_bm_off = cpu_to_le32(cluster_off);
 	alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
 	/* just in case... In the future when we find space ourselves,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d52100d49b6..0462a7f4e21 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -236,6 +236,7 @@ struct ocfs2_super
 
 	enum ocfs2_local_alloc_state local_alloc_state;
 	struct buffer_head *local_alloc_bh;
+	u64 la_last_gd;
 
 	/* Next two fields are for local node slot recovery during
 	 * mount. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 7ac68cac041..9d91e66f51a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode,
 				    struct buffer_head *group_bh,
 				    u32 bits_wanted, u32 min_bits,
 				    u16 *bit_off, u16 *bits_found);
-static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
-			      u32 bits_wanted,
-			      u32 min_bits,
-			      u16 *bit_off,
-			      unsigned int *num_bits,
-			      u64 *bg_blkno);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     struct ocfs2_alloc_context *ac,
 				     u32 bits_wanted,
@@ -1030,12 +1024,103 @@ static int ocfs2_block_group_search(struct inode *inode,
 	return ret;
 }
 
+static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+				       struct ocfs2_journal_handle *handle,
+				       struct buffer_head *di_bh,
+				       u32 num_bits,
+				       u16 chain)
+{
+	int ret;
+	u32 tmp_used;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
+
+	ret = ocfs2_journal_access(handle, inode, di_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+	di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
+	le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
+
+	ret = ocfs2_journal_dirty(handle, di_bh);
+	if (ret < 0)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
+
+static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+				  u32 bits_wanted,
+				  u32 min_bits,
+				  u16 *bit_off,
+				  unsigned int *num_bits,
+				  u64 gd_blkno,
+				  u16 *bits_left)
+{
+	int ret;
+	u16 found;
+	struct buffer_head *group_bh = NULL;
+	struct ocfs2_group_desc *gd;
+	struct inode *alloc_inode = ac->ac_inode;
+	struct ocfs2_journal_handle *handle = ac->ac_handle;
+
+	ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
+			       &group_bh, OCFS2_BH_CACHED, alloc_inode);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	gd = (struct ocfs2_group_desc *) group_bh->b_data;
+	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+		OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+				  bit_off, &found);
+	if (ret < 0) {
+		if (ret != -ENOSPC)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	*num_bits = found;
+
+	ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
+					       *num_bits,
+					       le16_to_cpu(gd->bg_chain));
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
+					 *bit_off, *num_bits);
+	if (ret < 0)
+		mlog_errno(ret);
+
+	*bits_left = le16_to_cpu(gd->bg_free_bits_count);
+
+out:
+	brelse(group_bh);
+
+	return ret;
+}
+
 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 			      u32 bits_wanted,
 			      u32 min_bits,
 			      u16 *bit_off,
 			      unsigned int *num_bits,
-			      u64 *bg_blkno)
+			      u64 *bg_blkno,
+			      u16 *bits_left)
 {
 	int status;
 	u16 chain, tmp_bits;
@@ -1173,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	     (unsigned long long)fe->i_blkno);
 
 	*bg_blkno = le64_to_cpu(bg->bg_blkno);
+	*bits_left = le16_to_cpu(bg->bg_free_bits_count);
 bail:
 	if (group_bh)
 		brelse(group_bh);
@@ -1194,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 {
 	int status;
 	u16 victim, i;
+	u16 bits_left = 0;
+	u64 hint_blkno = ac->ac_last_group;
 	struct ocfs2_chain_list *cl;
 	struct ocfs2_dinode *fe;
 
@@ -1220,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 		goto bail;
 	}
 
+	if (hint_blkno) {
+		/* Attempt to short-circuit the usual search mechanism
+		 * by jumping straight to the most recently used
+		 * allocation group. This helps us mantain some
+		 * contiguousness across allocations. */
+		status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
+						bit_off, num_bits,
+						hint_blkno, &bits_left);
+		if (!status) {
+			/* Be careful to update *bg_blkno here as the
+			 * caller is expecting it to be filled in, and
+			 * ocfs2_search_one_group() won't do that for
+			 * us. */
+			*bg_blkno = hint_blkno;
+			goto set_hint;
+		}
+		if (status < 0 && status != -ENOSPC) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
 
 	victim = ocfs2_find_victim_chain(cl);
@@ -1227,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	ac->ac_allow_chain_relink = 1;
 
 	status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
-				    num_bits, bg_blkno);
+				    num_bits, bg_blkno, &bits_left);
 	if (!status)
-		goto bail;
+		goto set_hint;
 	if (status < 0 && status != -ENOSPC) {
 		mlog_errno(status);
 		goto bail;
@@ -1251,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 
 		ac->ac_chain = i;
 		status = ocfs2_search_chain(ac, bits_wanted, min_bits,
-					    bit_off, num_bits,
-					    bg_blkno);
+					    bit_off, num_bits, bg_blkno,
+					    &bits_left);
 		if (!status)
 			break;
 		if (status < 0 && status != -ENOSPC) {
@@ -1260,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 			goto bail;
 		}
 	}
-bail:
 
+set_hint:
+	if (status != -ENOSPC) {
+		/* If the next search of this group is not likely to
+		 * yield a suitable extent, then we reset the last
+		 * group hint so as to not waste a disk read */
+		if (bits_left < min_bits)
+			ac->ac_last_group = 0;
+		else
+			ac->ac_last_group = *bg_blkno;
+	}
+
+bail:
 	mlog_exit(status);
 	return status;
 }
@@ -1415,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 {
 	int status;
 	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
-	u64 bg_blkno;
+	u64 bg_blkno = 0;
 	u16 bg_bit_off;
 
 	mlog_entry_void();
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a76c82a7cea..c787838d105 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context {
 	u16    ac_chain;
 	int    ac_allow_chain_relink;
 	group_search_t *ac_group_search;
+
+	u64    ac_last_group;
 };
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
-- 
cgit v1.2.3-70-g09d2


From 0e1edbd99994270023cea5afe593f972eb09a778 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 10 Aug 2006 14:40:41 +1000
Subject: [XFS] Fix xfs_free_extent related NULL pointer dereference.

We recently fixed an out-of-space deadlock in XFS, and part of that fix
involved the addition of the XFS_ALLOC_FLAG_FREEING flag to some of the
space allocator calls to indicate they're freeing space, not allocating
it. There was a missed xfs_alloc_fix_freelist condition test that did not
correctly test "flags". The same test would also test an uninitialised
structure field (args->userdata) and depending on its value either would
or would not return early with a critical buffer pointer set to NULL.

This fixes that up, adds asserts to several places to catch future botches
of this nature, and skips sections of xfs_alloc_fix_freelist that are
irrelevent for the space-freeing case.

SGI-PV: 955303
SGI-Modid: xfs-linux-melb:xfs-kern:26743a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_alloc.c | 103 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index eef6763f3a6..d2bbcd882a6 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist(
 				&agbp)))
 			return error;
 		if (!pag->pagf_init) {
+			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
 			args->agbp = NULL;
 			return 0;
 		}
 	} else
 		agbp = NULL;
 
-	/* If this is a metadata preferred pag and we are user data
+	/*
+	 * If this is a metadata preferred pag and we are user data
 	 * then try somewhere else if we are not being asked to
 	 * try harder at this point
 	 */
-	if (pag->pagf_metadata && args->userdata && flags) {
+	if (pag->pagf_metadata && args->userdata &&
+	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
 		args->agbp = NULL;
 		return 0;
 	}
 
-	need = XFS_MIN_FREELIST_PAG(pag, mp);
-	delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
-	/*
-	 * If it looks like there isn't a long enough extent, or enough
-	 * total blocks, reject it.
-	 */
-	longest = (pag->pagf_longest > delta) ?
-		(pag->pagf_longest - delta) :
-		(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
-	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
-	    (!(flags & XFS_ALLOC_FLAG_FREEING) &&
-	     (int)(pag->pagf_freeblks + pag->pagf_flcount -
-		   need - args->total) <
-	     (int)args->minleft)) {
-		if (agbp)
-			xfs_trans_brelse(tp, agbp);
-		args->agbp = NULL;
-		return 0;
+	if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+		need = XFS_MIN_FREELIST_PAG(pag, mp);
+		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+		/*
+		 * If it looks like there isn't a long enough extent, or enough
+		 * total blocks, reject it.
+		 */
+		longest = (pag->pagf_longest > delta) ?
+			(pag->pagf_longest - delta) :
+			(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+		if ((args->minlen + args->alignment + args->minalignslop - 1) >
+				longest ||
+		    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
+			   need - args->total) < (int)args->minleft)) {
+			if (agbp)
+				xfs_trans_brelse(tp, agbp);
+			args->agbp = NULL;
+			return 0;
+		}
 	}
+
 	/*
 	 * Get the a.g. freespace buffer.
 	 * Can fail if we're not blocking on locks, and it's held.
@@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist(
 				&agbp)))
 			return error;
 		if (agbp == NULL) {
+			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
 			args->agbp = NULL;
 			return 0;
 		}
@@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist(
 	 */
 	agf = XFS_BUF_TO_AGF(agbp);
 	need = XFS_MIN_FREELIST(agf, mp);
-	delta = need > be32_to_cpu(agf->agf_flcount) ?
-		(need - be32_to_cpu(agf->agf_flcount)) : 0;
 	/*
 	 * If there isn't enough total or single-extent, reject it.
 	 */
-	longest = be32_to_cpu(agf->agf_longest);
-	longest = (longest > delta) ? (longest - delta) :
-		(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
-	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
-	     (!(flags & XFS_ALLOC_FLAG_FREEING) &&
-		(int)(be32_to_cpu(agf->agf_freeblks) +
-		   be32_to_cpu(agf->agf_flcount) - need - args->total) <
-	     (int)args->minleft)) {
-		xfs_trans_brelse(tp, agbp);
-		args->agbp = NULL;
-		return 0;
+	if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+		delta = need > be32_to_cpu(agf->agf_flcount) ?
+			(need - be32_to_cpu(agf->agf_flcount)) : 0;
+		longest = be32_to_cpu(agf->agf_longest);
+		longest = (longest > delta) ? (longest - delta) :
+			(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
+		if ((args->minlen + args->alignment + args->minalignslop - 1) >
+				longest ||
+		    ((int)(be32_to_cpu(agf->agf_freeblks) +
+		     be32_to_cpu(agf->agf_flcount) - need - args->total) <
+				(int)args->minleft)) {
+			xfs_trans_brelse(tp, agbp);
+			args->agbp = NULL;
+			return 0;
+		}
 	}
 	/*
 	 * Make the freelist shorter if it's too long.
@@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist(
 		 * on a completely full ag.
 		 */
 		if (targs.agbno == NULLAGBLOCK) {
-			if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-				xfs_trans_brelse(tp, agflbp);
-				args->agbp = NULL;
-				return 0;
-			}
-			break;
+			if (flags & XFS_ALLOC_FLAG_FREEING)
+				break;
+			xfs_trans_brelse(tp, agflbp);
+			args->agbp = NULL;
+			return 0;
 		}
 		/*
 		 * Put each allocated block on the list.
@@ -2442,31 +2452,26 @@ xfs_free_extent(
 	xfs_fsblock_t	bno,	/* starting block number of extent */
 	xfs_extlen_t	len)	/* length of extent */
 {
-#ifdef DEBUG
-	xfs_agf_t	*agf;	/* a.g. freespace header */
-#endif
-	xfs_alloc_arg_t	args;	/* allocation argument structure */
+	xfs_alloc_arg_t	args;
 	int		error;
 
 	ASSERT(len != 0);
+	memset(&args, 0, sizeof(xfs_alloc_arg_t));
 	args.tp = tp;
 	args.mp = tp->t_mountp;
 	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
 	ASSERT(args.agno < args.mp->m_sb.sb_agcount);
 	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
-	args.alignment = 1;
-	args.minlen = args.minleft = args.minalignslop = 0;
 	down_read(&args.mp->m_peraglock);
 	args.pag = &args.mp->m_perag[args.agno];
 	if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
 		goto error0;
 #ifdef DEBUG
 	ASSERT(args.agbp != NULL);
-	agf = XFS_BUF_TO_AGF(args.agbp);
-	ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length));
+	ASSERT((args.agbno + len) <=
+		be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
 #endif
-	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
-		len, 0);
+	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
 error0:
 	up_read(&args.mp->m_peraglock);
 	return error;
-- 
cgit v1.2.3-70-g09d2


From 1725cd0ae07bb31f68803edcc5bdc99952c7d2f4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 13 Aug 2006 23:24:17 -0700
Subject: [PATCH] adfs error message fix

Don't use NULL as a printf control string.  Fixes bug #6889.

Cc: Ralph Corderoy <ralph@inputplus.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/adfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index ba1c88af49f..82011019494 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
 	if (adfs_checkmap(sb, dm))
 		return dm;
 
-	adfs_error(sb, NULL, "map corrupted");
+	adfs_error(sb, "map corrupted");
 
 error_free:
 	while (--zone >= 0)
-- 
cgit v1.2.3-70-g09d2


From 95f8797f42b058333d1e6f0d1dcd8edf5dc6c244 Mon Sep 17 00:00:00 2001
From: Dan Bastone <dan@pwienterprises.com>
Date: Sun, 13 Aug 2006 23:24:18 -0700
Subject: [PATCH] initialize parts of udf inode earlier in create

Eric says:

> I saw an oops down this path when trying to create a new file on a UDF
> filesystem which was internally marked as readonly, but mounted rw:
>
> udf_create
>         udf_new_inode
>                 new_inode
>                         alloc_inode
>                         	udf_alloc_inode
>                 udf_new_block
>                         returns EIO due to readonlyness
>                 iput (on error)

I ran into the same issue today, but when listing a directory with
invalid/corrupt entries:

udf_lookup
        udf_iget
                get_new_inode_fast
                        alloc_inode
                                udf_alloc_inode
                __udf_read_inode
                        fails for any reason
                iput (on error)
                        ...

The following patch to udf_alloc_inode() should take care of both (and
other similar) cases, but I've only tested it with udf_lookup().

Signed-off-by: Dan Bastone <dan@pwienterprises.com>
Cc: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/udf/super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4df822c881b..7de172efa08 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
 	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
+
+	ei->i_unique = 0;
+	ei->i_lenExtents = 0;
+	ei->i_next_alloc_block = 0;
+	ei->i_next_alloc_goal = 0;
+	ei->i_strat4096 = 0;
+
 	return &ei->vfs_inode;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1d7ea7324ae7a59f8e17e4ba76a2707c1e6f24d2 Mon Sep 17 00:00:00 2001
From: Alexander Zarochentsev <zam@namesys.com>
Date: Sun, 13 Aug 2006 23:24:27 -0700
Subject: [PATCH] fuse: fix error case in fuse_readpages

Don't let fuse_readpages leave the @pages list not empty when exiting
on error.

[akpm@osdl.org: kernel-doc fixes]
Signed-off-by: Alexander Zarochentsev <zam@namesys.com>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/fuse/file.c     | 10 ++++++++--
 include/linux/mm.h |  1 +
 mm/swap.c          | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 63614ed1633..5c4fcd1dbf5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	struct fuse_readpages_data data;
 	int err;
 
+	err = -EIO;
 	if (is_bad_inode(inode))
-		return -EIO;
+		goto clean_pages_up;
 
 	data.file = file;
 	data.inode = inode;
 	data.req = fuse_get_req(fc);
+	err = PTR_ERR(data.req);
 	if (IS_ERR(data.req))
-		return PTR_ERR(data.req);
+		goto clean_pages_up;
 
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
@@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 			fuse_put_request(fc, data.req);
 	}
 	return err;
+
+clean_pages_up:
+	put_pages_list(pages);
+	return err;
 }
 
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 990957e0929..f0b135cd86d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -336,6 +336,7 @@ static inline void init_page_count(struct page *page)
 }
 
 void put_page(struct page *page);
+void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
 
diff --git a/mm/swap.c b/mm/swap.c
index 8fd095c4ae5..687686a61f7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -54,6 +54,26 @@ void put_page(struct page *page)
 }
 EXPORT_SYMBOL(put_page);
 
+/**
+ * put_pages_list(): release a list of pages
+ *
+ * Release a list of pages which are strung together on page.lru.  Currently
+ * used by read_cache_pages() and related error recovery code.
+ *
+ * @pages: list of pages threaded on page->lru
+ */
+void put_pages_list(struct list_head *pages)
+{
+	while (!list_empty(pages)) {
+		struct page *victim;
+
+		victim = list_entry(pages->prev, struct page, lru);
+		list_del(&victim->lru);
+		page_cache_release(victim);
+	}
+}
+EXPORT_SYMBOL(put_pages_list);
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
-- 
cgit v1.2.3-70-g09d2


From 74361cb6828398a96167b3234e186fbd731e5f30 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Mon, 14 Aug 2006 08:54:48 -0700
Subject: [PATCH] fcntl(F_SETSIG) fix

fcntl(F_SETSIG) no longer works on leases because
lease_release_private_callback() gets called as the lease is copied in
order to initialise it.

The problem is that lease_alloc() performs an unnecessary initialisation,
which sets the lease_manager_ops.  Avoid the problem by allocating the
target lease structure using locks_alloc_lock().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/locks.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index b0b41a64e10..d7c53392cac 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1421,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
 	if (!leases_enable)
 		goto out;
 
-	error = lease_alloc(filp, arg, &fl);
-	if (error)
+	error = -ENOMEM;
+	fl = locks_alloc_lock();
+	if (fl == NULL)
 		goto out;
 
 	locks_copy_lock(fl, lease);
@@ -1430,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
 	locks_insert_lock(before, fl);
 
 	*flp = fl;
+	error = 0;
 out:
 	return error;
 }
-- 
cgit v1.2.3-70-g09d2