diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 5 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 13 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 90 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 117 |
4 files changed, 125 insertions, 100 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 17159cacbd9..5d70b3e6d49 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -20,9 +20,9 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> +#include <trace/events/jbd2.h> /* * Unlink a buffer from a transaction checkpoint list. @@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); - trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", - journal->j_devname, result); + trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0b7d3b8226f..7b4088b2364 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -16,7 +16,6 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> -#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> @@ -26,6 +25,7 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/bio.h> +#include <trace/events/jbd2.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal, * block allocation with delalloc. We need to write * only allocated blocks here. */ + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); err = journal_submit_inode_data_buffers(mapping); if (!ret) ret = err; @@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); - trace_mark(jbd2_start_commit, "dev %s transaction %d", - journal->j_devname, commit_transaction->t_tid); + trace_jbd2_start_commit(journal, commit_transaction); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ if (commit_transaction->t_synchronous_commit) write_op = WRITE_SYNC_PLUG; + trace_jbd2_commit_locking(journal, commit_transaction); stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_locked = jiffies; stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, @@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + trace_jbd2_commit_flushing(journal, commit_transaction); stats.u.run.rs_flushing = jiffies; stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, stats.u.run.rs_flushing); @@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_COMMIT; spin_unlock(&journal->j_state_lock); + trace_jbd2_commit_logging(journal, commit_transaction); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, stats.u.run.rs_logging); @@ -1054,9 +1057,7 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, - journal->j_tail_sequence); + trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); if (to_free) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 62be7d294ec..e378cb38397 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -38,6 +38,10 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/math64.h> +#include <linux/hash.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/jbd2.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -293,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); struct jbd2_buffer_trigger_type *triggers; + journal_t *journal = transaction->t_journal; /* * The buffer really shouldn't be locked: only the current committing @@ -306,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); + /* keep subsequent assertions sane */ + new_bh->b_state = 0; + init_buffer(new_bh, NULL, NULL); + atomic_set(&new_bh->b_count, 1); + new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ /* * If a new transaction has already done a buffer copy-out, then @@ -384,14 +394,6 @@ repeat: kunmap_atomic(mapped_data, KM_USER0); } - /* keep subsequent assertions sane */ - new_bh->b_state = 0; - init_buffer(new_bh, NULL, NULL); - atomic_set(&new_bh->b_count, 1); - jbd_unlock_bh_state(bh_in); - - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ - set_bh_page(new_bh, new_page, new_offset); new_jh->b_transaction = NULL; new_bh->b_size = jh2bh(jh_in)->b_size; @@ -408,7 +410,11 @@ repeat: * copying is moved to the transaction's shadow queue. */ JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); - jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_lock(&journal->j_list_lock); + __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh_in); + JBUFFER_TRACE(new_jh, "file as BJ_IO"); jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); @@ -2377,6 +2383,72 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + * + * The caller should use rcu_read_lock() in order to make sure the + * device name stays valid until its done with it. We use + * rcu_read_lock() as well to make sure we're safe in case the caller + * gets sloppy, and because rcu_read_lock() is cheap and can be safely + * nested. + */ +struct devname_cache { + struct rcu_head rcu; + dev_t device; + char devname[BDEVNAME_SIZE]; +}; +#define CACHE_SIZE_BITS 6 +static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; +static DEFINE_SPINLOCK(devname_cache_lock); + +static void free_devcache(struct rcu_head *rcu) +{ + kfree(rcu); +} + +const char *jbd2_dev_to_name(dev_t device) +{ + int i = hash_32(device, CACHE_SIZE_BITS); + char *ret; + struct block_device *bd; + static struct devname_cache *new_dev; + + rcu_read_lock(); + if (devcache[i] && devcache[i]->device == device) { + ret = devcache[i]->devname; + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); + if (!new_dev) + return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + spin_lock(&devname_cache_lock); + if (devcache[i]) { + if (devcache[i]->device == device) { + kfree(new_dev); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; + } + call_rcu(&devcache[i]->rcu, free_devcache); + } + devcache[i] = new_dev; + devcache[i]->device = device; + bd = bdget(device); + if (bd) { + bdevname(bd, devcache[i]->devname); + bdput(bd); + } else + __bdevname(device, devcache[i]->devname); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_dev_to_name); + MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 996ffda06bf..6213ac728f3 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal) wake_up(&journal->j_wait_transaction_locked); } -/* - * Report any unexpected dirty buffers which turn up. Normally those - * indicate an error, but they can occur if the user is running (say) - * tune2fs to modify the live filesystem, so we need the option of - * continuing as gracefully as possible. # - * - * The caller should already hold the journal lock and - * j_list_lock spinlock: most callers will need those anyway - * in order to probe the buffer's journaling state safely. - */ -static void jbd_unexpected_dirty_buffer(struct journal_head *jh) +static void warn_dirty_buffer(struct buffer_head *bh) { - int jlist; - - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - struct buffer_head *bh = jh2bh(jh); + char b[BDEVNAME_SIZE]; - if (test_clear_buffer_dirty(bh)) - set_buffer_jbddirty(bh); - } + printk(KERN_WARNING + "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " + "There's a risk of filesystem corruption in case of system " + "crash.\n", + bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } /* @@ -593,14 +574,16 @@ repeat: if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); + warn_dirty_buffer(bh); } /* * In any case we need to clean the dirty flag and we must * do it under the buffer lock to be sure we don't race * with running write-out. */ - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); + JBUFFER_TRACE(jh, "Journalling dirty buffer"); + clear_buffer_dirty(bh); + set_buffer_jbddirty(bh); } unlock_buffer(bh); @@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { + /* + * Previous jbd2_journal_forget() could have left the buffer + * with jbddirty bit set because it was being committed. When + * the commit finished, we've filed the buffer for + * checkpointing and marked it dirty. Now we are reallocating + * the buffer so the transaction freeing it must have + * committed and so it's safe to clear the dirty bit. + */ + clear_buffer_dirty(jh2bh(jh)); jh->b_transaction = transaction; /* first access by this transaction */ @@ -1547,36 +1539,6 @@ out: return; } -/* - * jbd2_journal_try_to_free_buffers() could race with - * jbd2_journal_commit_transaction(). The later might still hold the - * reference count to the buffers when inspecting them on - * t_syncdata_list or t_locked_list. - * - * jbd2_journal_try_to_free_buffers() will call this function to - * wait for the current transaction to finish syncing data buffers, before - * try to free that buffer. - * - * Called with journal->j_state_lock hold. - */ -static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal) -{ - transaction_t *transaction; - tid_t tid; - - spin_lock(&journal->j_state_lock); - transaction = journal->j_committing_transaction; - - if (!transaction) { - spin_unlock(&journal->j_state_lock); - return; - } - - tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); - jbd2_log_wait_commit(journal, tid); -} - /** * int jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -1649,25 +1611,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, ret = try_to_free_buffers(page); - /* - * There are a number of places where jbd2_journal_try_to_free_buffers() - * could race with jbd2_journal_commit_transaction(), the later still - * holds the reference to the buffers to free while processing them. - * try_to_free_buffers() failed to free those buffers. Some of the - * caller of releasepage() request page buffers to be dropped, otherwise - * treat the fail-to-free as errors (such as generic_file_direct_IO()) - * - * So, if the caller of try_to_release_page() wants the synchronous - * behaviour(i.e make sure buffers are dropped upon return), - * let's wait for the current transaction to finish flush of - * dirty data buffers, then try to free those buffers again, - * with the journal locked. - */ - if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { - jbd2_journal_wait_for_transaction_sync_data(journal); - ret = try_to_free_buffers(page); - } - busy: return ret; } @@ -1693,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + /* + * We don't want to write the buffer anymore, clear the + * bit so that we don't confuse checks in + * __journal_file_buffer + */ + clear_buffer_dirty(bh); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - clear_buffer_jbddirty(bh); may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); @@ -1945,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, if (jh->b_transaction && jh->b_jlist == jlist) return; - /* The following list of buffer states needs to be consistent - * with __jbd_unexpected_dirty_buffer()'s handling of dirty - * state. */ - if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { + /* + * For metadata buffers, we track dirty bit in buffer_jbddirty + * instead of buffer_dirty. We should not see a dirty bit set + * here because we clear it in do_get_write_access but e.g. + * tune2fs can modify the sb and set the dirty bit at any time + * so we try to gracefully handle that. + */ + if (buffer_dirty(bh)) + warn_dirty_buffer(bh); if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) was_dirty = 1; |