From 77e841de8abac4755cc83ca224fdf71418d65380 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sun, 12 Oct 2008 16:39:16 -0400 Subject: jbd2: abort when failed to log metadata buffers If we failed to write metadata buffers to the journal space and succeeded to write the commit record, stale data can be written back to the filesystem as metadata in the recovery phase. To avoid this, when we failed to write out metadata buffers, abort the journal before writing the commit record. We can also avoid this kind of corruption by using the journal checksum feature because it can detect invalid metadata blocks in the journal and avoid them from being replayed. So we don't need to care about asynchronous commit record writeout with a checksum. Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0d3814a35ed..78e4da93412 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -783,6 +783,9 @@ wait_for_iobuf: /* AKPM: bforget here */ } + if (err) + jbd2_journal_abort(journal, err); + jbd_debug(3, "JBD: commit phase 5\n"); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, -- cgit v1.2.3-70-g09d2 From 7ad7445f60fe4d46c4c9d2a9463db180d2a3b270 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 20:29:31 -0400 Subject: jbd2: don't dirty original metadata buffer on abort Currently, original metadata buffers are dirtied when they are unfiled whether the journal has aborted or not. Eventually these buffers will be written-back to the filesystem by pdflush. This means some metadata buffers are written to the filesystem without journaling if the journal aborts. So if both journal abort and system crash happen at the same time, the filesystem would become inconsistent state. Additionally, replaying journaled metadata can overwrite the latest metadata on the filesystem partly. Because, if the journal gets aborted, journaled metadata are preserved and replayed during the next mount not to lose uncheckpointed metadata. This would also break the consistency of the filesystem. This patch prevents original metadata buffers from being dirtied on abort by clearing BH_JBDDirty flag from those buffers. Thus, no metadata buffers are written to the filesystem without journaling. Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 78e4da93412..849f10496ce 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -504,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) jh = commit_transaction->t_buffers; /* If we're in abort mode, we just un-journal the buffer and - release it for background writing. */ + release it. */ if (is_journal_aborted(journal)) { + clear_buffer_jbddirty(jh2bh(jh)); JBUFFER_TRACE(jh, "journal is aborting: refile"); jbd2_journal_refile_buffer(journal, jh); /* If that was the last one, we need to clean up @@ -884,6 +885,8 @@ restart_loop: if (buffer_jbddirty(bh)) { JBUFFER_TRACE(jh, "add to new checkpointing trans"); __jbd2_journal_insert_checkpoint(jh, commit_transaction); + if (is_journal_aborted(journal)) + clear_buffer_jbddirty(bh); JBUFFER_TRACE(jh, "refile for checkpoint writeback"); __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); -- cgit v1.2.3-70-g09d2 From 5bf5683a33f3584da6eced480967c4f7e11515a8 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 22:12:43 -0400 Subject: ext4: add an option to control error handling on file data If the journal doesn't abort when it gets an IO error in file data blocks, the file data corruption will spread silently. Because most of applications and commands do buffered writes without fsync(), they don't notice the IO error. It's scary for mission critical systems. On the other hand, if the journal aborts whenever it gets an IO error in file data blocks, the system will easily become inoperable. So this patch introduces a filesystem option to determine whether it aborts the journal or just call printk() when it gets an IO error in file data. If you mount an ext4 fs with data_err=abort option, it aborts on file data write error. If you mount it with data_err=ignore, it doesn't abort, just call printk(). data_err=ignore is the default. Here is the corresponding patch of the ext3 version: http://kerneltrap.org/mailarchive/linux-kernel/2008/9/9/3239374 Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4.txt | 5 +++++ fs/ext4/ext4.h | 2 ++ fs/ext4/super.c | 16 ++++++++++++++++ fs/jbd2/commit.c | 2 ++ include/linux/jbd2.h | 3 +++ 5 files changed, 28 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 74484e69640..eb154ef36c2 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -223,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error. errors=continue Keep going on a filesystem error. errors=panic Panic and halt the machine if an error occurs. +data_err=ignore(*) Just print an error message if an error occurs + in a file data buffer in ordered mode. +data_err=abort Abort the journal if an error occurs in a file + data buffer in ordered mode. + grpid Give objects the same group ID as their creator. bsdgroups diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f46a513a515..6690a41cdd9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -540,6 +540,8 @@ do { \ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ +#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ + /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 79bd3989e84..014677b8e22 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -778,6 +778,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",inode_readahead_blks=%u", sbi->s_inode_readahead_blks); + if (test_opt(sb, DATA_ERR_ABORT)) + seq_puts(seq, ",data_err=abort"); + ext4_show_quota_options(seq, sb); return 0; } @@ -907,6 +910,7 @@ enum { Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, @@ -953,6 +957,8 @@ static match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_data_err_abort, "data_err=abort"}, + {Opt_data_err_ignore, "data_err=ignore"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1187,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb, sbi->s_mount_opt |= data_opt; } break; + case Opt_data_err_abort: + set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; + case Opt_data_err_ignore: + clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -2535,6 +2547,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JBD2_BARRIER; else journal->j_flags &= ~JBD2_BARRIER; + if (test_opt(sb, DATA_ERR_ABORT)) + journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; + else + journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; spin_unlock(&journal->j_state_lock); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 849f10496ce..0abe02c4242 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -684,6 +684,8 @@ start_journal_io: printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " "on %s\n", journal->j_devname); + if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) + jbd2_journal_abort(journal, err); err = 0; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c9e7d781db3..d2e91ea998f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -967,6 +967,9 @@ struct journal_s #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ +#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file + * data write error in ordered + * mode */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3-70-g09d2