From a8d770d987ee20b59fba6c37d7f0f2a351913c4b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:44:54 +0200 Subject: xfs: use xfs_sync_inodes() for device flushing Currently xfs_device_flush calls sync_blockdev() which is a no-op for XFS as all it's metadata is held in a different address to the one sync_blockdev() works on. Call xfs_sync_inodes() instead to flush all the delayed allocation blocks out. To do this as efficiently as possible, do it via two passes - one to do an async flush of all the dirty blocks and a second to wait for all the IO to complete. This requires some modification to the xfs-sync_inodes_ag() flush code to do efficiently. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 08ce72316bf..8b97d82d7a8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -361,7 +361,7 @@ xfs_flush_space( return 0; case 2: xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_device(ip); + xfs_flush_inodes(ip); xfs_ilock(ip, XFS_ILOCK_EXCL); *fsynced = 3; return 0; -- cgit v1.2.3-70-g09d2 From 5825294edd3364cbba6514f70d88debec4f6cec7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:45:44 +0200 Subject: xfs: make inode flush at ENOSPC synchronous When we are writing to a single file and hit ENOSPC, we trigger a background flush of the inode and try again. Because we hold page locks and the iolock, the flush won't proceed until after we release these locks. This occurs once we've given up and ENOSPC has been reported. Hence if this one is the only dirty inode in the system, we'll get an ENOSPC prematurely. To fix this, remove the async flush from the allocation routines and move it to the top of the write path where we can do a synchronous flush and retry the write again. Only retry once as a second ENOSPC indicates that we really are ENOSPC. This avoids a page cache deadlock when trying to do this flush synchronously in the allocation layer that was identified by Mikulas Patocka. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_lrw.c | 18 +++++++++++++++++- fs/xfs/linux-2.6/xfs_sync.c | 25 ------------------------- fs/xfs/linux-2.6/xfs_sync.h | 1 - fs/xfs/xfs_iomap.c | 2 +- 4 files changed, 18 insertions(+), 28 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7e90daa0d1d..9142192ccbe 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -751,10 +751,26 @@ start: goto relock; } } else { + int enospc = 0; + ssize_t ret2 = 0; + +write_retry: xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); - ret = generic_file_buffered_write(iocb, iovp, segs, + ret2 = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we + * aren't holding any page locks and retry *once* + */ + if (ret2 == -ENOSPC && !enospc) { + error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); + if (error) + goto out_unlock_internal; + enospc = 1; + goto write_retry; + } + ret = ret2; } current->backing_dev_info = NULL; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 88caafc8ef1..73cf8dc1973 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -426,31 +426,6 @@ xfs_syncd_queue_work( * heads, looking about for more room... */ STATIC void -xfs_flush_inode_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - filemap_flush(inode->i_mapping); - iput(inode); -} - -void -xfs_flush_inode( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); - delay(msecs_to_jiffies(500)); -} - -/* - * This is the "bigger hammer" version of xfs_flush_inode_work... - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). - */ -STATIC void xfs_flush_inodes_work( struct xfs_mount *mp, void *arg) diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index ec95e264805..6e83a35626e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -44,7 +44,6 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); -void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8b97d82d7a8..7b8b1707103 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -347,7 +347,7 @@ xfs_flush_space( case 0: if (ip->i_delayed_blks) { xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inode(ip); + delay(1); xfs_ilock(ip, XFS_ILOCK_EXCL); *fsynced = 1; } else { -- cgit v1.2.3-70-g09d2 From 8de2bf937a6bea8f0f775fd5399ba20c1a0c3d77 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:49:12 +0200 Subject: xfs: remove xfs_flush_space The only thing we need to do now when we get an ENOSPC condition during delayed allocation reservation is flush all the other inodes with delalloc blocks on them and retry without EOF preallocation. Remove the unneeded mess that is xfs_flush_space() and just call xfs_flush_inodes() directly from xfs_iomap_write_delay(). Also, change the location of the retry label to avoid trying to do EOF preallocation because we don't want to do that at ENOSPC. This enables us to remove the BMAPI_SYNC flag as it is no longer used. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_iomap.c | 61 ++++++++++++++---------------------------------------- fs/xfs/xfs_iomap.h | 3 +-- 2 files changed, 16 insertions(+), 48 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7b8b1707103..5aaa2d7ec15 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -337,38 +337,6 @@ xfs_iomap_eof_align_last_fsb( return 0; } -STATIC int -xfs_flush_space( - xfs_inode_t *ip, - int *fsynced, - int *ioflags) -{ - switch (*fsynced) { - case 0: - if (ip->i_delayed_blks) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - delay(1); - xfs_ilock(ip, XFS_ILOCK_EXCL); - *fsynced = 1; - } else { - *ioflags |= BMAPI_SYNC; - *fsynced = 2; - } - return 0; - case 1: - *fsynced = 2; - *ioflags |= BMAPI_SYNC; - return 0; - case 2: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inodes(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); - *fsynced = 3; - return 0; - } - return 1; -} - STATIC int xfs_cmn_err_fsblock_zero( xfs_inode_t *ip, @@ -538,15 +506,9 @@ error_out: } /* - * If the caller is doing a write at the end of the file, - * then extend the allocation out to the file system's write - * iosize. We clean up any extra space left over when the - * file is closed in xfs_inactive(). - * - * For sync writes, we are flushing delayed allocate space to - * try to make additional space available for allocation near - * the filesystem full boundary - preallocation hurts in that - * situation, of course. + * If the caller is doing a write at the end of the file, then extend the + * allocation out to the file system's write iosize. We clean up any extra + * space left over when the file is closed in xfs_inactive(). */ STATIC int xfs_iomap_eof_want_preallocate( @@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate( int n, error, imaps; *prealloc = 0; - if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) + if ((offset + count) <= ip->i_size) return 0; /* @@ -611,7 +573,7 @@ xfs_iomap_write_delay( xfs_extlen_t extsz; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; - int prealloc, fsynced = 0; + int prealloc, flushed = 0; int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -627,12 +589,12 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); -retry: error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; +retry: if (prealloc) { aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); @@ -659,15 +621,22 @@ retry: /* * If bmapi returned us nothing, and if we didn't get back EDQUOT, - * then we must have run out of space - flush delalloc, and retry.. + * then we must have run out of space - flush all other inodes with + * delalloc blocks and retry without EOF preallocation. */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, ip, offset, count); - if (xfs_flush_space(ip, &fsynced, &ioflag)) + if (flushed) return XFS_ERROR(ENOSPC); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_flush_inodes(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); + + flushed = 1; error = 0; + prealloc = 0; goto retry; } diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index a1cc1322fc0..fdcf7b82747 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -40,8 +40,7 @@ typedef enum { BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ - BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ - BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ + BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ } bmapi_flags_t; -- cgit v1.2.3-70-g09d2