[PATCH] Fix reiserfs latencies caused by data=ordered

ReiserFS does periodic cleanup of old transactions in order to limit the length of time a journal replay may take after a crash. Sometimes, writing metadata from an old (already committed) transaction may require committing a newer transaction, which also requires writing all data=ordered buffers. This can cause very long stalls on journal_begin. This patch makes sure new transactions will not need to be committed before trying a periodic reclaim of an old transaction. It is low risk because if a bad decision is made, it just means a slightly longer journal replay after a crash. Signed-off-by: Chris Mason <mason@suse.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Chris Mason <mason@suse.com> 2006-09-29 01:59:56 -0700
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-09-29 09:18:11 -0700
commit: a3172027148120b8f8797cbecc7d0a0b215736a1 (patch)
tree: 9da7b5eafe136c8c5b9e76e9a9cccd70899df252
parent: 25736b1c692d436508585d1d710912e6f76be2d8 (diff)
1 files changed, 43 insertions, 11 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9b3672d6936..e6b5ccf23f1 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
 	return NULL;
 }
 
+static int newer_jl_done(struct reiserfs_journal_cnode *cn)
+{
+	struct super_block *sb = cn->sb;
+	b_blocknr_t blocknr = cn->blocknr;
+
+	cn = cn->hprev;
+	while (cn) {
+		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
+		    atomic_read(&cn->jlist->j_commit_left) != 0)
+				    return 0;
+		cn = cn->hprev;
+	}
+	return 1;
+}
+
 static void remove_journal_hash(struct super_block *,
 				struct reiserfs_journal_cnode **,
 				struct reiserfs_journal_list *, unsigned long,
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s,
 	return err;
 }
 
+static int test_transaction(struct super_block *s,
+                            struct reiserfs_journal_list *jl)
+{
+	struct reiserfs_journal_cnode *cn;
+
+	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
+		return 1;
+
+	cn = jl->j_realblock;
+	while (cn) {
+		/* if the blocknr == 0, this has been cleared from the hash,
+		 ** skip it
+		 */
+		if (cn->blocknr == 0) {
+			goto next;
+		}
+		if (cn->bh && !newer_jl_done(cn))
+			return 0;
+	      next:
+		cn = cn->next;
+		cond_resched();
+	}
+	return 0;
+}
+
 static int write_one_transaction(struct super_block *s,
 				 struct reiserfs_journal_list *jl,
 				 struct buffer_chunk *chunk)
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p)
 		flush_commit_list(p_s_sb, jl, 1);
 	}
 	unlock_kernel();
-	/*
-	 * this is a little racey, but there's no harm in missing
-	 * the filemap_fdata_write
-	 */
-	if (!atomic_read(&journal->j_async_throttle)
-	    && !reiserfs_is_journal_aborted(journal)) {
-		atomic_inc(&journal->j_async_throttle);
-		filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
-		atomic_dec(&journal->j_async_throttle);
-	}
 }
 
 /*
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s)
 		entry = journal->j_journal_list.next;
 		jl = JOURNAL_LIST_ENTRY(entry);
 		/* this check should always be run, to send old lists to disk */
-		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
+		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
+		    atomic_read(&jl->j_commit_left) == 0 &&
+		    test_transaction(s, jl)) {
 			flush_used_journal_lists(s, jl);
 		} else {
 			break;
author	Chris Mason <mason@suse.com>	2006-09-29 01:59:56 -0700
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-09-29 09:18:11 -0700
commit	a3172027148120b8f8797cbecc7d0a0b215736a1 (patch)
tree	9da7b5eafe136c8c5b9e76e9a9cccd70899df252
parent	25736b1c692d436508585d1d710912e6f76be2d8 (diff)