From c859cdd1da008b3825555be3242908088a3de366 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:10 +0000 Subject: xfs: defer AIO/DIO completions We really shouldn't complete AIO or DIO requests until we have finished the unwritten extent conversion and size update. This means fsync never has to pick up any ioends as all work has been completed when signalling I/O completion. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_aops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/xfs_aops.h') diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 71f721e1a71..ce3dcb50762 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -47,6 +47,7 @@ typedef struct xfs_ioend { unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ + unsigned int io_isasync : 1; /* needs aio_complete */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */ -- cgit v1.2.3-70-g09d2 From 4a06fd262dbeb70a2c315f7259e063efa493fe3d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 08:28:13 +0000 Subject: xfs: remove i_iocount We now have an i_dio_count filed and surrounding infrastructure to wait for direct I/O completion instead of i_icount, and we have never needed to iocount waits for buffered I/O given that we only set the page uptodate after finishing all required work. Thus remove i_iocount, and replace the actually needed waits with calls to inode_dio_wait. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_aops.c | 39 +-------------------------------------- fs/xfs/xfs_aops.h | 3 --- fs/xfs/xfs_file.c | 8 ++------ fs/xfs/xfs_iget.c | 2 -- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_iops.c | 4 ++-- fs/xfs/xfs_super.c | 7 +------ fs/xfs/xfs_sync.c | 8 ++------ fs/xfs/xfs_vnodeops.c | 4 +--- 9 files changed, 9 insertions(+), 67 deletions(-) (limited to 'fs/xfs/xfs_aops.h') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e1ff0770784..46bba3e0af4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -38,40 +38,6 @@ #include #include - -/* - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t xfs_ioend_wq[NVSYNC]; - -void __init -xfs_ioend_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&xfs_ioend_wq[i]); -} - -void -xfs_ioend_wait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = to_ioend_wq(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -STATIC void -xfs_ioend_wake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(to_ioend_wq(ip)); -} - void xfs_count_page_state( struct page *page, @@ -115,7 +81,6 @@ xfs_destroy_ioend( xfs_ioend_t *ioend) { struct buffer_head *bh, *next; - struct xfs_inode *ip = XFS_I(ioend->io_inode); for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; @@ -127,7 +92,7 @@ xfs_destroy_ioend( aio_complete(ioend->io_iocb, ioend->io_result, 0); inode_dio_done(ioend->io_inode); } - xfs_ioend_wake(ip); + mempool_free(ioend, xfs_ioend_pool); } @@ -298,7 +263,6 @@ xfs_alloc_ioend( ioend->io_inode = inode; ioend->io_buffer_head = NULL; ioend->io_buffer_tail = NULL; - atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); ioend->io_offset = 0; ioend->io_size = 0; ioend->io_iocb = NULL; @@ -558,7 +522,6 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index ce3dcb50762..116dd5c3703 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -61,9 +61,6 @@ typedef struct xfs_ioend { extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); -extern void xfs_ioend_init(void); -extern void xfs_ioend_wait(struct xfs_inode *); - extern void xfs_count_page_state(struct page *, int *, int *); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index cbbac5cc9c2..ee63c4fb363 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -149,10 +149,6 @@ xfs_file_fsync( xfs_iflags_clear(ip, XFS_ITRUNCATED); - xfs_ilock(ip, XFS_IOLOCK_SHARED); - xfs_ioend_wait(ip); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure @@ -758,7 +754,7 @@ restart: * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by - * hitting it with a big hammer (i.e. xfs_ioend_wait()). + * hitting it with a big hammer (i.e. inode_dio_wait()). * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. @@ -821,7 +817,7 @@ xfs_file_dio_aio_write( * otherwise demote the lock if we had to flush cached pages */ if (unaligned_io) - xfs_ioend_wait(ip); + inode_dio_wait(inode); else if (*iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); *iolock = XFS_IOLOCK_SHARED; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 7759812c1bb..0fa98b1c70e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -75,7 +75,6 @@ xfs_inode_alloc( return NULL; } - ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); @@ -150,7 +149,6 @@ xfs_inode_free( } /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2380a4bcbec..760140d1dd6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -257,7 +257,6 @@ typedef struct xfs_inode { xfs_fsize_t i_size; /* in-memory size */ xfs_fsize_t i_new_size; /* size when write completes */ - atomic_t i_iocount; /* outstanding I/O count */ /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 32aca87bde5..e041e917c1d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -840,9 +840,9 @@ xfs_setattr_size( } /* - * Wait for all I/O to complete. + * Wait for all direct I/O to complete. */ - xfs_ioend_wait(ip); + inode_dio_wait(inode); error = -block_truncate_page(inode->i_mapping, iattr->ia_size, xfs_get_blocks); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2366c54cc4f..54d5e102ffe 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -796,8 +796,6 @@ xfs_fs_destroy_inode( if (is_bad_inode(inode)) goto out_reclaim; - xfs_ioend_wait(ip); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); /* @@ -837,7 +835,6 @@ xfs_fs_inode_init_once( inode_init_once(VFS_I(ip)); /* xfs inode */ - atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); init_waitqueue_head(&ip->i_ipin_wait); @@ -914,9 +911,8 @@ xfs_fs_write_inode( * of forcing it all the way to stable storage using a * synchronous transaction we let the log force inside the * ->sync_fs call do that for thus, which reduces the number - * of synchronous log foces dramatically. + * of synchronous log forces dramatically. */ - xfs_ioend_wait(ip); error = xfs_log_inode(ip); if (error) goto out; @@ -1681,7 +1677,6 @@ init_xfs_fs(void) printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); - xfs_ioend_init(); xfs_dir_startup(); error = xfs_init_zones(); diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 90cc197e043..bf2b38c21ca 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -227,21 +227,17 @@ xfs_sync_inode_data( int error = 0; if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - goto out_wait; + return 0; if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { if (flags & SYNC_TRYLOCK) - goto out_wait; + return 0; xfs_ilock(ip, XFS_IOLOCK_SHARED); } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - out_wait: - if (flags & SYNC_WAIT) - xfs_ioend_wait(ip); return error; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 51fc429527b..c2ff0fc8656 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -647,8 +647,6 @@ xfs_inactive( if (truncate) { xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ioend_wait(ip); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, @@ -2076,7 +2074,7 @@ xfs_free_file_space( if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); /* wait for the completion of any pending DIOs */ - xfs_ioend_wait(ip); + inode_dio_wait(VFS_I(ip)); } rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); -- cgit v1.2.3-70-g09d2