From d1645e526a1e5842c9ac433d73419ba886676cf3 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sat, 18 Oct 2008 20:27:53 -0700 Subject: jbd: abort when failed to log metadata buffers If we failed to write metadata buffers to the journal space and succeeded to write the commit record, stale data can be written back to the filesystem as metadata in the recovery phase. To avoid this, when we failed to write out metadata buffers, abort the journal before writing the commit record. Signed-off-by: Hidehiro Kawai Acked-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/jbd/commit.c') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index ae08c057e75..f1ea861b992 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -762,6 +762,9 @@ wait_for_iobuf: /* AKPM: bforget here */ } + if (err) + journal_abort(journal, err); + jbd_debug(3, "JBD: commit phase 6\n"); if (journal_write_commit_record(journal, commit_transaction)) -- cgit v1.2.3-70-g09d2 From 885e353c7427db7b60692789741b34e605b0b69b Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sat, 18 Oct 2008 20:27:54 -0700 Subject: jbd: don't dirty original metadata buffer on abort Currently, original metadata buffers are dirtied when they are unfiled whether the journal has aborted or not. Eventually these buffers will be written-back to the filesystem by pdflush. This means some metadata buffers are written to the filesystem without journaling if the journal aborts. So if both journal abort and system crash happen at the same time, the filesystem would become inconsistent state. Additionally, replaying journaled metadata can overwrite the latest metadata on the filesystem partly. Because, if the journal aborts, journaled metadata are preserved and replayed during the next mount not to lose uncheckpointed metadata. This would also break the consistency of the filesystem. This patch prevents original metadata buffers from being dirtied on abort by clearing BH_JBDDirty flag from those buffers. Thus, no metadata buffers are written to the filesystem without journaling. Signed-off-by: Hidehiro Kawai Acked-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/jbd/commit.c') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f1ea861b992..d6a6659f3e4 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -518,9 +518,10 @@ void journal_commit_transaction(journal_t *journal) jh = commit_transaction->t_buffers; /* If we're in abort mode, we just un-journal the buffer and - release it for background writing. */ + release it. */ if (is_journal_aborted(journal)) { + clear_buffer_jbddirty(jh2bh(jh)); JBUFFER_TRACE(jh, "journal is aborting: refile"); journal_refile_buffer(journal, jh); /* If that was the last one, we need to clean up @@ -855,6 +856,8 @@ restart_loop: if (buffer_jbddirty(bh)) { JBUFFER_TRACE(jh, "add to new checkpointing trans"); __journal_insert_checkpoint(jh, commit_transaction); + if (is_journal_aborted(journal)) + clear_buffer_jbddirty(bh); JBUFFER_TRACE(jh, "refile for checkpoint writeback"); __journal_refile_buffer(jh); jbd_unlock_bh_state(bh); -- cgit v1.2.3-70-g09d2 From 0e4fb5e283870757024294bc4567a7c59d936f0b Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sat, 18 Oct 2008 20:27:57 -0700 Subject: ext3: add an option to control error handling on file data If the journal doesn't abort when it gets an IO error in file data blocks, the file data corruption will spread silently. Because most of applications and commands do buffered writes without fsync(), they don't notice the IO error. It's scary for mission critical systems. On the other hand, if the journal aborts whenever it gets an IO error in file data blocks, the system will easily become inoperable. So this patch introduces a filesystem option to determine whether it aborts the journal or just call printk() when it gets an IO error in file data. If you mount a ext3 fs with data_err=abort option, it aborts on file data write error. If you mount it with data_err=ignore, it doesn't abort, just call printk(). data_err=ignore is the default. Signed-off-by: Hidehiro Kawai Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/ext3.txt | 5 +++++ fs/ext3/super.c | 16 ++++++++++++++++ fs/jbd/commit.c | 2 ++ include/linux/ext3_fs.h | 2 ++ include/linux/jbd.h | 3 +++ 5 files changed, 28 insertions(+) (limited to 'fs/jbd/commit.c') diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 295f26cd895..9dd2a3bb2ac 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error. errors=continue Keep going on a filesystem error. errors=panic Panic and halt the machine if an error occurs. +data_err=ignore(*) Just print an error message if an error occurs + in a file data buffer in ordered mode. +data_err=abort Abort the journal if an error occurs in a file + data buffer in ordered mode. + grpid Give objects the same group ID as their creator. bsdgroups diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 399a96a6c55..3a260af5544 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); + if (test_opt(sb, DATA_ERR_ABORT)) + seq_puts(seq, ",data_err=abort"); + ext3_show_quota_options(seq, sb); return 0; @@ -754,6 +757,7 @@ enum { Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, @@ -796,6 +800,8 @@ static const match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_data_err_abort, "data_err=abort"}, + {Opt_data_err_ignore, "data_err=ignore"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb, sbi->s_mount_opt |= data_opt; } break; + case Opt_data_err_abort: + set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; + case Opt_data_err_ignore: + clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JFS_BARRIER; else journal->j_flags &= ~JFS_BARRIER; + if (test_opt(sb, DATA_ERR_ABORT)) + journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; + else + journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; spin_unlock(&journal->j_state_lock); } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index d6a6659f3e4..25719d902c5 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal) printk(KERN_WARNING "JBD: Detected IO errors while flushing file data " "on %s\n", bdevname(journal->j_fs_dev, b)); + if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR) + journal_abort(journal, err); err = 0; } diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 159d9b476cd..d14f0291848 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -380,6 +380,8 @@ struct ext3_inode { #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write + * error in ordered mode */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 7ebbcb1c9ba..35d4f6342fa 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -816,6 +816,9 @@ struct journal_s #define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JFS_LOADED 0x010 /* The journal superblock has been loaded */ #define JFS_BARRIER 0x020 /* Use IDE barriers */ +#define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file + * data write error in ordered + * mode */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3-70-g09d2