From ea0174a7137c8ca9f130ca681f3a99c872da6778 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 12 Oct 2009 21:34:27 -0500 Subject: ext3: retry failed direct IO allocations On a 256M 4k block filesystem, doing this in a loop: dd if=/dev/zero of=test oflag=direct bs=1M count=64 rm -f test eventually leads to spurious ENOSPC: dd: writing `test': No space left on device As with other block allocation callers, it looks like we need to potentially retry the allocations on the initial ENOSPC. A similar patch went into ext4 (commit fbbf69456619de5d251cb9f1df609069178c62d5) Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index acf1b142332..069a163393b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1735,6 +1735,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); + int retries = 0; if (rw == WRITE) { loff_t final_size = offset + count; @@ -1757,9 +1758,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } } +retry: ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext3_get_block, NULL); + if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) + goto retry; if (orphan) { int err; -- cgit v1.2.3-70-g09d2 From fe8bc91c4c30122b357d197117705cfd4fabaf28 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 16 Oct 2009 19:26:15 +0200 Subject: ext3: Wait for proper transaction commit on fsync We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara Reviewed-by: Aneesh Kumar K.V --- fs/ext3/fsync.c | 36 ++++++++++++++++-------------------- fs/ext3/inode.c | 32 +++++++++++++++++++++++++++++++- fs/ext3/super.c | 2 ++ include/linux/ext3_fs_i.h | 8 ++++++++ 4 files changed, 57 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 451d166bbe9..8209f266e9a 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -46,19 +46,21 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; + struct ext3_inode_info *ei = EXT3_I(inode); + journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; int ret = 0; + tid_t commit_tid; + + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; J_ASSERT(ext3_journal_current_handle() == NULL); /* - * data=writeback: + * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. - * sync_inode() will sync the metadata - * - * data=ordered: - * The caller's filemap_fdatawrite() will write the data and - * sync_inode() will write the inode if it is dirty. Then the caller's - * filemap_fdatawait() will wait on the pages. + * Metadata is in the journal, we wait for a proper transaction + * to commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). @@ -73,22 +75,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) goto out; } - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto flush; + if (datasync) + commit_tid = atomic_read(&ei->i_datasync_tid); + else + commit_tid = atomic_read(&ei->i_sync_tid); - /* - * The VFS has written the file data. If the inode is unaltered - * then we need not start a commit. - */ - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - ret = sync_inode(inode, &wbc); + if (log_start_commit(journal, commit_tid)) { + log_wait_commit(journal, commit_tid); goto out; } -flush: + /* * In case we didn't commit a transaction, we have to flush * disk caches manually so that data really is on persistent diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 069a163393b..354ed3b47b3 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -699,8 +699,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, int err = 0; struct ext3_block_alloc_info *block_i; ext3_fsblk_t current_block; + struct ext3_inode_info *ei = EXT3_I(inode); - block_i = EXT3_I(inode)->i_block_alloc_info; + block_i = ei->i_block_alloc_info; /* * If we're splicing into a [td]indirect block (as opposed to the * inode) then we need to get write access to the [td]indirect block @@ -741,6 +742,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); + /* ext3_mark_inode_dirty already updated i_sync_tid */ + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); /* had we spliced it onto indirect block? */ if (where->bh) { @@ -2754,6 +2757,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) struct ext3_inode_info *ei; struct buffer_head *bh; struct inode *inode; + journal_t *journal = EXT3_SB(sb)->s_journal; + transaction_t *transaction; long ret; int block; @@ -2831,6 +2836,30 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); + /* + * Set transaction id's of transactions that have to be committed + * to finish f[data]sync. We set them to currently running transaction + * as we cannot be sure that the inode or some of its metadata isn't + * part of the transaction - the inode could have been reclaimed and + * now it is reread from disk. + */ + if (journal) { + tid_t tid; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction) + transaction = journal->j_running_transaction; + else + transaction = journal->j_committing_transaction; + if (transaction) + tid = transaction->t_tid; + else + tid = journal->j_commit_sequence; + spin_unlock(&journal->j_state_lock); + atomic_set(&ei->i_sync_tid, tid); + atomic_set(&ei->i_datasync_tid, tid); + } + if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { /* @@ -3015,6 +3044,7 @@ again: err = rc; ei->i_state &= ~EXT3_STATE_NEW; + atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7a520a862f4..427496c4767 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -466,6 +466,8 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return NULL; ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; + atomic_set(&ei->i_datasync_tid, 0); + atomic_set(&ei->i_sync_tid, 0); return &ei->vfs_inode; } diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index ca1bfe90004..93e7428156b 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -137,6 +137,14 @@ struct ext3_inode_info { * by other means, so we have truncate_mutex. */ struct mutex truncate_mutex; + + /* + * Transactions that contain inode's metadata needed to complete + * fsync and fdatasync, respectively. + */ + atomic_t i_sync_tid; + atomic_t i_datasync_tid; + struct inode vfs_inode; }; -- cgit v1.2.3-70-g09d2 From 7b02bec07efe1d6c7d48c786e0c1a38d28fe7245 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 10 Nov 2009 17:13:22 +0800 Subject: JBD/JBD2: free j_wbuf if journal init fails. If journal init fails, we need to free j_wbuf. Cc: Andrew Morton Cc: Jan Kara Signed-off-by: Tao Ma Signed-off-by: Jan Kara --- fs/jbd/journal.c | 2 ++ fs/jbd2/journal.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index bd3c073b485..49d5cd6053c 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -756,6 +756,7 @@ journal_t * journal_init_dev(struct block_device *bdev, return journal; out_err: + kfree(journal->j_wbuf); kfree(journal); return NULL; } @@ -820,6 +821,7 @@ journal_t * journal_init_inode (struct inode *inode) return journal; out_err: + kfree(journal->j_wbuf); kfree(journal); return NULL; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b0ab5219bec..fed85388ee8 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -913,6 +913,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, return journal; out_err: + kfree(journal->j_wbuf); jbd2_stats_proc_exit(journal); kfree(journal); return NULL; @@ -986,6 +987,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) return journal; out_err: + kfree(journal->j_wbuf); jbd2_stats_proc_exit(journal); kfree(journal); return NULL; -- cgit v1.2.3-70-g09d2