diff options
Diffstat (limited to 'fs/jbd2/journal.c')
-rw-r--r-- | fs/jbd2/journal.c | 280 |
1 files changed, 206 insertions, 74 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ac0d027595d..538417c1fdb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -39,15 +39,18 @@ #include <linux/seq_file.h> #include <linux/math64.h> #include <linux/hash.h> +#include <linux/log2.h> +#include <linux/vmalloc.h> +#include <linux/backing-dev.h> +#include <linux/bitops.h> #define CREATE_TRACE_POINTS #include <trace/events/jbd2.h> #include <asm/uaccess.h> #include <asm/page.h> +#include <asm/system.h> -EXPORT_SYMBOL(jbd2_journal_start); -EXPORT_SYMBOL(jbd2_journal_restart); EXPORT_SYMBOL(jbd2_journal_extend); EXPORT_SYMBOL(jbd2_journal_stop); EXPORT_SYMBOL(jbd2_journal_lock_updates); @@ -93,6 +96,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int jbd2_journal_create_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -140,7 +144,7 @@ static int kjournald2(void *arg) /* * And now, wait forever for commit wakeup events. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); loop: if (journal->j_flags & JBD2_UNMOUNT) @@ -151,10 +155,10 @@ loop: if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); jbd2_journal_commit_transaction(journal); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); goto loop; } @@ -166,9 +170,9 @@ loop: * be already stopped. */ jbd_debug(1, "Now suspending kjournald2\n"); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); refrigerator(); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } else { /* * We assume on resume that commits are already there, @@ -188,9 +192,9 @@ loop: if (journal->j_flags & JBD2_UNMOUNT) should_sleep = 0; if (should_sleep) { - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); schedule(); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } finish_wait(&journal->j_wait_commit, &wait); } @@ -208,7 +212,7 @@ loop: goto loop; end_loop: - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); @@ -231,16 +235,16 @@ static int jbd2_journal_start_thread(journal_t *journal) static void journal_kill_thread(journal_t *journal) { - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_UNMOUNT; while (journal->j_task) { wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); wait_event(journal->j_wait_done_commit, journal->j_task == NULL); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* @@ -294,7 +298,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); - struct jbd2_buffer_trigger_type *triggers; journal_t *journal = transaction->t_journal; /* @@ -308,7 +311,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, */ J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); - new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); +retry_alloc: + new_bh = alloc_buffer_head(GFP_NOFS); + if (!new_bh) { + /* + * Failure is not an option, but __GFP_NOFAIL is going + * away; so we retry ourselves here. + */ + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_alloc; + } + /* keep subsequent assertions sane */ new_bh->b_state = 0; init_buffer(new_bh, NULL, NULL); @@ -325,21 +338,21 @@ repeat: done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); new_offset = offset_in_page(jh_in->b_frozen_data); - triggers = jh_in->b_frozen_triggers; } else { new_page = jh2bh(jh_in)->b_page; new_offset = offset_in_page(jh2bh(jh_in)->b_data); - triggers = jh_in->b_triggers; } mapped_data = kmap_atomic(new_page, KM_USER0); /* - * Fire any commit trigger. Do this before checking for escaping, - * as the trigger may modify the magic offset. If a copy-out - * happens afterwards, it will have the correct data in the buffer. + * Fire data frozen trigger if data already wasn't frozen. Do this + * before checking for escaping, as the trigger may modify the magic + * offset. If a copy-out happens afterwards, it will have the correct + * data in the buffer. */ - jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, - triggers); + if (!done_copy_out) + jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, + jh_in->b_triggers); /* * Check for escaping @@ -440,7 +453,7 @@ int __jbd2_log_space_left(journal_t *journal) { int left = journal->j_free; - assert_spin_locked(&journal->j_state_lock); + /* assert_spin_locked(&journal->j_state_lock); */ /* * Be pessimistic here about the number of those free blocks which @@ -467,7 +480,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) */ if (!tid_geq(journal->j_commit_request, target)) { /* - * We want a new commit: OK, mark the request and wakup the + * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. */ @@ -485,9 +498,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) { int ret; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); ret = __jbd2_log_start_commit(journal, tid); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } @@ -506,7 +519,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction_t *transaction = NULL; tid_t tid; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; __jbd2_log_start_commit(journal, transaction->t_tid); @@ -514,12 +527,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction = journal->j_committing_transaction; if (!transaction) { - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); return 0; /* Nothing to retry */ } tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); jbd2_log_wait_commit(journal, tid); return 1; } @@ -533,7 +546,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) { int ret = 0; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; @@ -552,7 +565,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) *ptid = journal->j_committing_transaction->t_tid; ret = 1; } - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } @@ -564,26 +577,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; + read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG - spin_lock(&journal->j_state_lock); if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG "%s: error: j_commit_request=%d, tid=%d\n", __func__, journal->j_commit_request, tid); } - spin_unlock(&journal->j_state_lock); #endif - spin_lock(&journal->j_state_lock); while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_done_commit, !tid_gt(tid, journal->j_commit_sequence)); - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); } - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); if (unlikely(is_journal_aborted(journal))) { printk(KERN_EMERG "journal commit I/O error\n"); @@ -600,7 +611,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) { unsigned long blocknr; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); J_ASSERT(journal->j_free > 1); blocknr = journal->j_head; @@ -608,7 +619,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) journal->j_free--; if (journal->j_head == journal->j_last) journal->j_head = journal->j_first; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return jbd2_journal_bmap(journal, blocknr, retp); } @@ -828,7 +839,7 @@ static journal_t * journal_init_common (void) mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); - spin_lock_init(&journal->j_state_lock); + rwlock_init(&journal->j_state_lock); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_min_batch_time = 0; @@ -1094,14 +1105,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_start = cpu_to_be32(journal->j_tail); sb->s_errno = cpu_to_be32(journal->j_errno); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); @@ -1115,19 +1126,19 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then * any future commit will have to be careful to update the * superblock again to re-record the true start of the log. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (sb->s_start) journal->j_flags &= ~JBD2_FLUSHED; else journal->j_flags |= JBD2_FLUSHED; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* @@ -1248,6 +1259,13 @@ int jbd2_journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (jbd2_journal_recover(journal)) @@ -1355,6 +1373,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, if (!compat && !ro && !incompat) return 1; + /* Load journal superblock if it is not loaded yet. */ + if (journal->j_format_version == 0 && + journal_get_superblock(journal) != 0) + return 0; if (journal->j_format_version == 1) return 0; @@ -1382,13 +1404,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { - journal_superblock_t *sb; - if (!compat && !ro && !incompat) return 1; - sb = journal->j_superblock; - /* We can support any known requested features iff the * superblock is in version 2. Otherwise we fail to support any * extended sb features. */ @@ -1536,7 +1554,7 @@ int jbd2_journal_flush(journal_t *journal) transaction_t *transaction = NULL; unsigned long old_tail; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); /* Force everything buffered to the log... */ if (journal->j_running_transaction) { @@ -1549,10 +1567,10 @@ int jbd2_journal_flush(journal_t *journal) if (transaction) { tid_t tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); jbd2_log_wait_commit(journal, tid); } else { - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* ...and flush everything in the log out to disk. */ @@ -1576,12 +1594,12 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); old_tail = journal->j_tail; journal->j_tail = 0; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); jbd2_journal_update_superblock(journal, 1); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_tail = old_tail; J_ASSERT(!journal->j_running_transaction); @@ -1589,7 +1607,7 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(!journal->j_checkpoint_transactions); J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return 0; } @@ -1608,7 +1626,6 @@ int jbd2_journal_flush(journal_t *journal) int jbd2_journal_wipe(journal_t *journal, int write) { - journal_superblock_t *sb; int err = 0; J_ASSERT (!(journal->j_flags & JBD2_LOADED)); @@ -1617,8 +1634,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (err) return err; - sb = journal->j_superblock; - if (!journal->j_tail) goto no_recovery; @@ -1656,12 +1671,12 @@ void __jbd2_journal_abort_hard(journal_t *journal) printk(KERN_ERR "Aborting journal on device %s.\n", journal->j_devname); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_ABORT; transaction = journal->j_running_transaction; if (transaction) __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* Soft abort: record the abort error status in the journal superblock, @@ -1746,12 +1761,12 @@ int jbd2_journal_errno(journal_t *journal) { int err; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); if (journal->j_flags & JBD2_ABORT) err = -EROFS; else err = journal->j_errno; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); return err; } @@ -1766,12 +1781,12 @@ int jbd2_journal_clear_err(journal_t *journal) { int err = 0; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_flags & JBD2_ABORT) err = -EROFS; else journal->j_errno = 0; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return err; } @@ -1784,10 +1799,10 @@ int jbd2_journal_clear_err(journal_t *journal) */ void jbd2_journal_ack_err(journal_t *journal) { - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_errno) journal->j_flags |= JBD2_ACK_ERR; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } int jbd2_journal_blocks_per_page(struct inode *inode) @@ -1807,6 +1822,127 @@ size_t journal_tag_bytes(journal_t *journal) } /* + * JBD memory management + * + * These functions are used to allocate block-sized chunks of memory + * used for making copies of buffer_head data. Very often it will be + * page-sized chunks of data, but sometimes it will be in + * sub-page-size chunks. (For example, 16k pages on Power systems + * with a 4k block file system.) For blocks smaller than a page, we + * use a SLAB allocator. There are slab caches for each block size, + * which are allocated at mount time, if necessary, and we only free + * (all of) the slab caches when/if the jbd2 module is unloaded. For + * this reason we don't need to a mutex to protect access to + * jbd2_slab[] allocating or releasing memory; only in + * jbd2_journal_create_slab(). + */ +#define JBD2_MAX_SLABS 8 +static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; +static DECLARE_MUTEX(jbd2_slab_create_sem); + +static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { + "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", + "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" +}; + + +static void jbd2_journal_destroy_slabs(void) +{ + int i; + + for (i = 0; i < JBD2_MAX_SLABS; i++) { + if (jbd2_slab[i]) + kmem_cache_destroy(jbd2_slab[i]); + jbd2_slab[i] = NULL; + } +} + +static int jbd2_journal_create_slab(size_t size) +{ + int i = order_base_2(size) - 10; + size_t slab_size; + + if (size == PAGE_SIZE) + return 0; + + if (i >= JBD2_MAX_SLABS) + return -EINVAL; + + if (unlikely(i < 0)) + i = 0; + down(&jbd2_slab_create_sem); + if (jbd2_slab[i]) { + up(&jbd2_slab_create_sem); + return 0; /* Already created */ + } + + slab_size = 1 << (i+10); + jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, + slab_size, 0, NULL); + up(&jbd2_slab_create_sem); + if (!jbd2_slab[i]) { + printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +static struct kmem_cache *get_slab(size_t size) +{ + int i = order_base_2(size) - 10; + + BUG_ON(i >= JBD2_MAX_SLABS); + if (unlikely(i < 0)) + i = 0; + BUG_ON(jbd2_slab[i] == NULL); + return jbd2_slab[i]; +} + +void *jbd2_alloc(size_t size, gfp_t flags) +{ + void *ptr; + + BUG_ON(size & (size-1)); /* Must be a power of 2 */ + + flags |= __GFP_REPEAT; + if (size == PAGE_SIZE) + ptr = (void *)__get_free_pages(flags, 0); + else if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + ptr = (void *)__get_free_pages(flags, order); + else + ptr = vmalloc(size); + } else + ptr = kmem_cache_alloc(get_slab(size), flags); + + /* Check alignment; SLUB has gotten this wrong in the past, + * and this can lead to user data corruption! */ + BUG_ON(((unsigned long) ptr) & (size-1)); + + return ptr; +} + +void jbd2_free(void *ptr, size_t size) +{ + if (size == PAGE_SIZE) { + free_pages((unsigned long)ptr, 0); + return; + } + if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + free_pages((unsigned long)ptr, order); + else + vfree(ptr); + return; + } + kmem_cache_free(get_slab(size), ptr); +}; + +/* * Journal_head storage management */ static struct kmem_cache *jbd2_journal_head_cache; @@ -2071,14 +2207,12 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode) { - int writeout = 0; - if (!journal) return; restart: spin_lock(&journal->j_list_lock); /* Is commit writing out inode - we have to wait */ - if (jinode->i_flags & JI_COMMIT_RUNNING) { + if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -2089,9 +2223,6 @@ restart: goto restart; } - /* Do we need to wait for data writeback? */ - if (journal->j_committing_transaction == jinode->i_transaction) - writeout = 1; if (jinode->i_transaction) { list_del(&jinode->i_list); jinode->i_transaction = NULL; @@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void) jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_slabs(); } static int __init journal_init(void) |