summaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c90
-rw-r--r--fs/jbd2/transaction.c117
4 files changed, 125 insertions, 100 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9..5d70b3e6d49 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/marker.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <trace/events/jbd2.h>
/*
* Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* journal straight away.
*/
result = jbd2_cleanup_journal_tail(journal);
- trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
- journal->j_devname, result);
+ trace_jbd2_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226f..7b4088b2364 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/marker.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
@@ -26,6 +25,7 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
+#include <trace/events/jbd2.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
* block allocation with delalloc. We need to write
* only allocated blocks here.
*/
+ trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
err = journal_submit_inode_data_buffers(mapping);
if (!ret)
ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
- trace_mark(jbd2_start_commit, "dev %s transaction %d",
- journal->j_devname, commit_transaction->t_tid);
+ trace_jbd2_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
if (commit_transaction->t_synchronous_commit)
write_op = WRITE_SYNC_PLUG;
+ trace_jbd2_commit_locking(journal, commit_transaction);
stats.u.run.rs_wait = commit_transaction->t_max_wait;
stats.u.run.rs_locked = jiffies;
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_switch_revoke_table(journal);
+ trace_jbd2_commit_flushing(journal, commit_transaction);
stats.u.run.rs_flushing = jiffies;
stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
+ trace_jbd2_commit_logging(journal, commit_transaction);
stats.u.run.rs_logging = jiffies;
stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
- trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
- journal->j_devname, commit_transaction->t_tid,
- journal->j_tail_sequence);
+ trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec..e378cb38397 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/math64.h>
+#include <linux/hash.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -293,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
struct jbd2_buffer_trigger_type *triggers;
+ journal_t *journal = transaction->t_journal;
/*
* The buffer really shouldn't be locked: only the current committing
@@ -306,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+ /* keep subsequent assertions sane */
+ new_bh->b_state = 0;
+ init_buffer(new_bh, NULL, NULL);
+ atomic_set(&new_bh->b_count, 1);
+ new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
/*
* If a new transaction has already done a buffer copy-out, then
@@ -384,14 +394,6 @@ repeat:
kunmap_atomic(mapped_data, KM_USER0);
}
- /* keep subsequent assertions sane */
- new_bh->b_state = 0;
- init_buffer(new_bh, NULL, NULL);
- atomic_set(&new_bh->b_count, 1);
- jbd_unlock_bh_state(bh_in);
-
- new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
-
set_bh_page(new_bh, new_page, new_offset);
new_jh->b_transaction = NULL;
new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -408,7 +410,11 @@ repeat:
* copying is moved to the transaction's shadow queue.
*/
JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
- jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+ spin_lock(&journal->j_list_lock);
+ __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh_in);
+
JBUFFER_TRACE(new_jh, "file as BJ_IO");
jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
@@ -2377,6 +2383,72 @@ static void __exit journal_exit(void)
jbd2_journal_destroy_caches();
}
+/*
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
+ * tracing infrastructure to map a dev_t to a device name.
+ *
+ * The caller should use rcu_read_lock() in order to make sure the
+ * device name stays valid until its done with it. We use
+ * rcu_read_lock() as well to make sure we're safe in case the caller
+ * gets sloppy, and because rcu_read_lock() is cheap and can be safely
+ * nested.
+ */
+struct devname_cache {
+ struct rcu_head rcu;
+ dev_t device;
+ char devname[BDEVNAME_SIZE];
+};
+#define CACHE_SIZE_BITS 6
+static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
+static DEFINE_SPINLOCK(devname_cache_lock);
+
+static void free_devcache(struct rcu_head *rcu)
+{
+ kfree(rcu);
+}
+
+const char *jbd2_dev_to_name(dev_t device)
+{
+ int i = hash_32(device, CACHE_SIZE_BITS);
+ char *ret;
+ struct block_device *bd;
+ static struct devname_cache *new_dev;
+
+ rcu_read_lock();
+ if (devcache[i] && devcache[i]->device == device) {
+ ret = devcache[i]->devname;
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
+
+ new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+ if (!new_dev)
+ return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+ spin_lock(&devname_cache_lock);
+ if (devcache[i]) {
+ if (devcache[i]->device == device) {
+ kfree(new_dev);
+ ret = devcache[i]->devname;
+ spin_unlock(&devname_cache_lock);
+ return ret;
+ }
+ call_rcu(&devcache[i]->rcu, free_devcache);
+ }
+ devcache[i] = new_dev;
+ devcache[i]->device = device;
+ bd = bdget(device);
+ if (bd) {
+ bdevname(bd, devcache[i]->devname);
+ bdput(bd);
+ } else
+ __bdevname(device, devcache[i]->devname);
+ ret = devcache[i]->devname;
+ spin_unlock(&devname_cache_lock);
+ return ret;
+}
+EXPORT_SYMBOL(jbd2_dev_to_name);
+
MODULE_LICENSE("GPL");
module_init(journal_init);
module_exit(journal_exit);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 996ffda06bf..6213ac728f3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
wake_up(&journal->j_wait_transaction_locked);
}
-/*
- * Report any unexpected dirty buffers which turn up. Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible. #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
+static void warn_dirty_buffer(struct buffer_head *bh)
{
- int jlist;
-
- /* If this buffer is one which might reasonably be dirty
- * --- ie. data, or not part of this journal --- then
- * we're OK to leave it alone, but otherwise we need to
- * move the dirty bit to the journal's own internal
- * JBDDirty bit. */
- jlist = jh->b_jlist;
-
- if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
- jlist == BJ_Shadow || jlist == BJ_Forget) {
- struct buffer_head *bh = jh2bh(jh);
+ char b[BDEVNAME_SIZE];
- if (test_clear_buffer_dirty(bh))
- set_buffer_jbddirty(bh);
- }
+ printk(KERN_WARNING
+ "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+ "There's a risk of filesystem corruption in case of system "
+ "crash.\n",
+ bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}
/*
@@ -593,14 +574,16 @@ repeat:
if (jh->b_next_transaction)
J_ASSERT_JH(jh, jh->b_next_transaction ==
transaction);
+ warn_dirty_buffer(bh);
}
/*
* In any case we need to clean the dirty flag and we must
* do it under the buffer lock to be sure we don't race
* with running write-out.
*/
- JBUFFER_TRACE(jh, "Unexpected dirty buffer");
- jbd_unexpected_dirty_buffer(jh);
+ JBUFFER_TRACE(jh, "Journalling dirty buffer");
+ clear_buffer_dirty(bh);
+ set_buffer_jbddirty(bh);
}
unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
if (jh->b_transaction == NULL) {
+ /*
+ * Previous jbd2_journal_forget() could have left the buffer
+ * with jbddirty bit set because it was being committed. When
+ * the commit finished, we've filed the buffer for
+ * checkpointing and marked it dirty. Now we are reallocating
+ * the buffer so the transaction freeing it must have
+ * committed and so it's safe to clear the dirty bit.
+ */
+ clear_buffer_dirty(jh2bh(jh));
jh->b_transaction = transaction;
/* first access by this transaction */
@@ -1547,36 +1539,6 @@ out:
return;
}
-/*
- * jbd2_journal_try_to_free_buffers() could race with
- * jbd2_journal_commit_transaction(). The later might still hold the
- * reference count to the buffers when inspecting them on
- * t_syncdata_list or t_locked_list.
- *
- * jbd2_journal_try_to_free_buffers() will call this function to
- * wait for the current transaction to finish syncing data buffers, before
- * try to free that buffer.
- *
- * Called with journal->j_state_lock hold.
- */
-static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
-{
- transaction_t *transaction;
- tid_t tid;
-
- spin_lock(&journal->j_state_lock);
- transaction = journal->j_committing_transaction;
-
- if (!transaction) {
- spin_unlock(&journal->j_state_lock);
- return;
- }
-
- tid = transaction->t_tid;
- spin_unlock(&journal->j_state_lock);
- jbd2_log_wait_commit(journal, tid);
-}
-
/**
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
@@ -1649,25 +1611,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
ret = try_to_free_buffers(page);
- /*
- * There are a number of places where jbd2_journal_try_to_free_buffers()
- * could race with jbd2_journal_commit_transaction(), the later still
- * holds the reference to the buffers to free while processing them.
- * try_to_free_buffers() failed to free those buffers. Some of the
- * caller of releasepage() request page buffers to be dropped, otherwise
- * treat the fail-to-free as errors (such as generic_file_direct_IO())
- *
- * So, if the caller of try_to_release_page() wants the synchronous
- * behaviour(i.e make sure buffers are dropped upon return),
- * let's wait for the current transaction to finish flush of
- * dirty data buffers, then try to free those buffers again,
- * with the journal locked.
- */
- if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
- jbd2_journal_wait_for_transaction_sync_data(journal);
- ret = try_to_free_buffers(page);
- }
-
busy:
return ret;
}
@@ -1693,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
+ /*
+ * We don't want to write the buffer anymore, clear the
+ * bit so that we don't confuse checks in
+ * __journal_file_buffer
+ */
+ clear_buffer_dirty(bh);
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
- clear_buffer_jbddirty(bh);
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
@@ -1945,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction && jh->b_jlist == jlist)
return;
- /* The following list of buffer states needs to be consistent
- * with __jbd_unexpected_dirty_buffer()'s handling of dirty
- * state. */
-
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
+ /*
+ * For metadata buffers, we track dirty bit in buffer_jbddirty
+ * instead of buffer_dirty. We should not see a dirty bit set
+ * here because we clear it in do_get_write_access but e.g.
+ * tune2fs can modify the sb and set the dirty bit at any time
+ * so we try to gracefully handle that.
+ */
+ if (buffer_dirty(bh))
+ warn_dirty_buffer(bh);
if (test_clear_buffer_dirty(bh) ||
test_clear_buffer_jbddirty(bh))
was_dirty = 1;