summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/jbd/checkpoint.c23
-rw-r--r--fs/jbd/commit.c9
-rw-r--r--fs/jbd/journal.c60
-rw-r--r--include/linux/jbd.h3
-rw-r--r--include/trace/events/jbd.h9
5 files changed, 65 insertions, 39 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 80c85f3e087..08c03044abd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal)
/*
* We need to make sure that any blocks that were recently written out
* --- perhaps by log_do_checkpoint() --- are flushed out before we
- * drop the transactions from the journal. It's unlikely this will be
- * necessary, especially with an appropriately sized journal, but we
- * need this to guarantee correctness. Fortunately
- * cleanup_journal_tail() doesn't get called all that often.
+ * drop the transactions from the journal. Similarly we need to be sure
+ * superblock makes it to disk before next transaction starts reusing
+ * freed space (otherwise we could replay some blocks of the new
+ * transaction thinking they belong to the old one). So we use
+ * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially
+ * with an appropriately sized journal, but we need this to guarantee
+ * correctness. Fortunately cleanup_journal_tail() doesn't get called
+ * all that often.
*/
- if (journal->j_flags & JFS_BARRIER)
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+ journal_update_sb_log_tail(journal, first_tid, blocknr,
+ WRITE_FLUSH_FUA);
spin_lock(&journal->j_state_lock);
- if (!tid_gt(first_tid, journal->j_tail_sequence)) {
- spin_unlock(&journal->j_state_lock);
- /* Someone else cleaned up journal so return 0 */
- return 0;
- }
/* OK, update the superblock to recover the freed space.
* Physical blocks come first: have we wrapped beyond the end of
* the log? */
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal)
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock);
- if (!(journal->j_flags & JFS_ABORT))
- journal_update_sb_log_tail(journal);
return 0;
}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 1b27f46e610..52c15c77602 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -309,7 +309,14 @@ void journal_commit_transaction(journal_t *journal)
if (journal->j_flags & JFS_FLUSHED) {
jbd_debug(3, "super block updated\n");
mutex_lock(&journal->j_checkpoint_mutex);
- journal_update_sb_log_tail(journal);
+ /*
+ * We hold j_checkpoint_mutex so tail cannot change under us.
+ * We don't need any special data guarantees for writing sb
+ * since journal is empty and it is ok for write to be
+ * flushed only with transaction commit.
+ */
+ journal_update_sb_log_tail(journal, journal->j_tail_sequence,
+ journal->j_tail, WRITE_SYNC);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
jbd_debug(3, "superblock not updated\n");
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b29c7678525..425c2f2cf17 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -938,8 +938,16 @@ static int journal_reset(journal_t *journal)
} else {
/* Lock here to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex);
- /* Add the dynamic fields and write it to disk. */
- journal_update_sb_log_tail(journal);
+ /*
+ * Update log tail information. We use WRITE_FUA since new
+ * transaction will start reusing journal space and so we
+ * must make sure information about current log tail is on
+ * disk before that.
+ */
+ journal_update_sb_log_tail(journal,
+ journal->j_tail_sequence,
+ journal->j_tail,
+ WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
return journal_start_thread(journal);
@@ -1018,11 +1026,15 @@ int journal_create(journal_t *journal)
return journal_reset(journal);
}
-static void journal_write_superblock(journal_t *journal)
+static void journal_write_superblock(journal_t *journal, int write_op)
{
struct buffer_head *bh = journal->j_sb_buffer;
+ int ret;
- trace_journal_write_superblock(journal);
+ trace_journal_write_superblock(journal, write_op);
+ if (!(journal->j_flags & JFS_BARRIER))
+ write_op &= ~(REQ_FUA | REQ_FLUSH);
+ lock_buffer(bh);
if (buffer_write_io_error(bh)) {
char b[BDEVNAME_SIZE];
/*
@@ -1040,40 +1052,46 @@ static void journal_write_superblock(journal_t *journal)
set_buffer_uptodate(bh);
}
- BUFFER_TRACE(bh, "marking dirty");
- mark_buffer_dirty(bh);
- sync_dirty_buffer(bh);
+ get_bh(bh);
+ bh->b_end_io = end_buffer_write_sync;
+ ret = submit_bh(write_op, bh);
+ wait_on_buffer(bh);
if (buffer_write_io_error(bh)) {
- char b[BDEVNAME_SIZE];
- printk(KERN_ERR "JBD: I/O error detected "
- "when updating journal superblock for %s.\n",
- journal_dev_name(journal, b));
clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh);
+ ret = -EIO;
+ }
+ if (ret) {
+ char b[BDEVNAME_SIZE];
+ printk(KERN_ERR "JBD: Error %d detected "
+ "when updating journal superblock for %s.\n",
+ ret, journal_dev_name(journal, b));
}
}
/**
* journal_update_sb_log_tail() - Update log tail in journal sb on disk.
* @journal: The journal to update.
+ * @tail_tid: TID of the new transaction at the tail of the log
+ * @tail_block: The first block of the transaction at the tail of the log
+ * @write_op: With which operation should we write the journal sb
*
* Update a journal's superblock information about log tail and write it to
* disk, waiting for the IO to complete.
*/
-void journal_update_sb_log_tail(journal_t *journal)
+void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+ unsigned int tail_block, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- spin_lock(&journal->j_state_lock);
- jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
- journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+ jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n",
+ tail_block, tail_tid);
- sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
- sb->s_start = cpu_to_be32(journal->j_tail);
- spin_unlock(&journal->j_state_lock);
+ sb->s_sequence = cpu_to_be32(tail_tid);
+ sb->s_start = cpu_to_be32(tail_block);
- journal_write_superblock(journal);
+ journal_write_superblock(journal, write_op);
/* Log is no longer empty */
spin_lock(&journal->j_state_lock);
@@ -1102,7 +1120,7 @@ static void mark_journal_empty(journal_t *journal)
sb->s_start = cpu_to_be32(0);
spin_unlock(&journal->j_state_lock);
- journal_write_superblock(journal);
+ journal_write_superblock(journal, WRITE_FUA);
spin_lock(&journal->j_state_lock);
/* Log is empty */
@@ -1127,7 +1145,7 @@ static void journal_update_sb_errno(journal_t *journal)
sb->s_errno = cpu_to_be32(journal->j_errno);
spin_unlock(&journal->j_state_lock);
- journal_write_superblock(journal);
+ journal_write_superblock(journal, WRITE_SYNC);
}
/*
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 9716d370c50..c8f32975f0e 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -864,7 +864,8 @@ extern int journal_destroy (journal_t *);
extern int journal_recover (journal_t *journal);
extern int journal_wipe (journal_t *, int);
extern int journal_skip_recovery (journal_t *);
-extern void journal_update_sb_log_tail (journal_t *);
+extern void journal_update_sb_log_tail (journal_t *, tid_t, unsigned int,
+ int);
extern void journal_abort (journal_t *, int);
extern int journal_errno (journal_t *);
extern void journal_ack_err (journal_t *);
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index d9658a940a3..da6f2591c25 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -170,19 +170,22 @@ TRACE_EVENT(jbd_cleanup_journal_tail,
);
TRACE_EVENT(journal_write_superblock,
- TP_PROTO(journal_t *journal),
+ TP_PROTO(journal_t *journal, int write_op),
- TP_ARGS(journal),
+ TP_ARGS(journal, write_op),
TP_STRUCT__entry(
__field( dev_t, dev )
+ __field( int, write_op )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->write_op = write_op;
),
- TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
+ TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->write_op)
);
#endif /* _TRACE_JBD_H */