From 33b8f7c2479dfcbc5c27174e44b5f659d9f33c70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2011 14:34:39 +0200 Subject: xfs: improve sync behaviour in the face of aggressive dirtying The following script from Wu Fengguang shows very bad behaviour in XFS when aggressively dirtying data during a sync on XFS, with sync times up to almost 10 times as long as ext4. A large part of the issue is that XFS writes data out itself two times in the ->sync_fs method, overriding the livelock protection in the core writeback code, and another issue is the lock-less xfs_ioend_wait call, which doesn't prevent new ioend from being queue up while waiting for the count to reach zero. This patch removes the XFS-internal sync calls and relies on the VFS to do it's work just like all other filesystems do. Note that the i_iocount wait which is rather suboptimal is simply removed here. We already do it in ->write_inode, which keeps the current supoptimal behaviour. We'll eventually need to remove that as well, but that's material for a separate commit. ------------------------------ snip ------------------------------ #!/bin/sh umount /dev/sda7 mkfs.xfs -f /dev/sda7 # mkfs.ext4 /dev/sda7 # mkfs.btrfs /dev/sda7 mount /dev/sda7 /fs echo $((50<<20)) > /proc/sys/vm/dirty_bytes pid= for i in `seq 10` do dd if=/dev/zero of=/fs/zero-$i bs=1M count=1000 & pid="$pid $!" done sleep 1 tic=$(date +'%s') sync tac=$(date +'%s') echo echo sync time: $((tac-tic)) egrep '(Dirty|Writeback|NFS_Unstable)' /proc/meminfo pidof dd > /dev/null && { kill -9 $pid; echo sync NOT livelocked; } ------------------------------ snip ------------------------------ Signed-off-by: Christoph Hellwig Reported-by: Wu Fengguang Reviewed-by: Alex Elder Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_sync.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 8ecad5ff9f9..f54e8ee1ed6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -359,14 +359,12 @@ xfs_quiesce_data( { int error, error2 = 0; - /* push non-blocking */ - xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - - /* push and block till complete */ - xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); + /* force out the newly dirtied log buffers */ + xfs_log_force(mp, XFS_LOG_SYNC); + /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp); -- cgit v1.2.3-70-g09d2 From adab0f67d1cdaf468bbc311bce4d61f17626a536 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Wed, 29 Jun 2011 22:10:14 +0000 Subject: xfs: Remove the second parameter to xfs_sb_count() Remove the second parameter to xfs_sb_count() since all callers of the function set them. Also, fix the header comment regarding it being called periodically. Signed-off-by: Chandra Seetharaman Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/xfs_mount.c | 15 +++++---------- fs/xfs/xfs_mount.h | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index f54e8ee1ed6..5cc158e52d4 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -434,7 +434,7 @@ xfs_quiesce_attr( WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1fe643aa0ef..7f25245da28 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1521,7 +1521,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); @@ -1557,18 +1557,14 @@ xfs_fs_writable(xfs_mount_t *mp) /* * xfs_log_sbcount * - * Called either periodically to keep the on disk superblock values - * roughly up to date or from unmount to make sure the values are - * correct on a clean unmount. + * Sync the superblock counters to disk. * * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not not + * we may need to use the transaction allocator which does not * block when the transaction subsystem is in its frozen state. */ int -xfs_log_sbcount( - xfs_mount_t *mp, - uint sync) +xfs_log_sbcount(xfs_mount_t *mp) { xfs_trans_t *tp; int error; @@ -1594,8 +1590,7 @@ xfs_log_sbcount( } xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - if (sync) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 3d68bb267c5..bb24dac42a2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -371,7 +371,7 @@ typedef struct xfs_mod_sb { int64_t msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -extern int xfs_log_sbcount(xfs_mount_t *, uint); +extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); -- cgit v1.2.3-70-g09d2 From 8daaa83145ef1f0a146680618328dbbd0fa76939 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jul 2011 14:14:46 +1000 Subject: xfs: make use of new shrinker callout for the inode cache Convert the inode reclaim shrinker to use the new per-sb shrinker operations. This allows much bigger reclaim batches to be used, and allows the XFS inode cache to be shrunk in proportion with the VFS dentry and inode caches. This avoids the problem of the VFS caches being shrunk significantly before the XFS inode cache is shrunk resulting in imbalances in the caches during reclaim. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/xfs/linux-2.6/xfs_super.c | 26 ++++++++++------ fs/xfs/linux-2.6/xfs_sync.c | 71 +++++++++++++++++--------------------------- fs/xfs/linux-2.6/xfs_sync.h | 5 ++-- 3 files changed, 46 insertions(+), 56 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a1a881e68a9..a9c6ccff7b4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1025,11 +1025,6 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); - /* - * Unregister the memory shrinker before we tear down the mount - * structure so we don't have memory reclaim racing with us here. - */ - xfs_inode_shrinker_unregister(mp); xfs_syncd_stop(mp); /* @@ -1416,8 +1411,6 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - xfs_inode_shrinker_register(mp); - error = xfs_mountfs(mp); if (error) goto out_syncd_stop; @@ -1440,7 +1433,6 @@ xfs_fs_fill_super( return 0; out_syncd_stop: - xfs_inode_shrinker_unregister(mp); xfs_syncd_stop(mp); out_filestream_unmount: xfs_filestream_unmount(mp); @@ -1465,7 +1457,6 @@ xfs_fs_fill_super( } fail_unmount: - xfs_inode_shrinker_unregister(mp); xfs_syncd_stop(mp); /* @@ -1491,6 +1482,21 @@ xfs_fs_mount( return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } +static int +xfs_fs_nr_cached_objects( + struct super_block *sb) +{ + return xfs_reclaim_inodes_count(XFS_M(sb)); +} + +static void +xfs_fs_free_cached_objects( + struct super_block *sb, + int nr_to_scan) +{ + xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); +} + static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, @@ -1504,6 +1510,8 @@ static const struct super_operations xfs_super_operations = { .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, + .nr_cached_objects = xfs_fs_nr_cached_objects, + .free_cached_objects = xfs_fs_free_cached_objects, }; static struct file_system_type xfs_fs_type = { diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 8ecad5ff9f9..9bd7e895a4e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -179,6 +179,8 @@ restart: if (error == EFSCORRUPTED) break; + cond_resched(); + } while (nr_found && !done); if (skipped) { @@ -986,6 +988,8 @@ restart: *nr_to_scan -= XFS_LOOKUP_BATCH; + cond_resched(); + } while (nr_found && !done && *nr_to_scan > 0); if (trylock && !done) @@ -1003,7 +1007,7 @@ restart: * ensure that when we get more reclaimers than AGs we block rather * than spin trying to execute reclaim. */ - if (trylock && skipped && *nr_to_scan > 0) { + if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { trylock = 0; goto restart; } @@ -1021,44 +1025,38 @@ xfs_reclaim_inodes( } /* - * Inode cache shrinker. + * Scan a certain number of inodes for reclaim. * * When called we make sure that there is a background (fast) inode reclaim in - * progress, while we will throttle the speed of reclaim via doiing synchronous + * progress, while we will throttle the speed of reclaim via doing synchronous * reclaim of inodes. That means if we come across dirty inodes, we wait for * them to be cleaned, which we hope will not be very long due to the * background walker having already kicked the IO off on those dirty inodes. */ -static int -xfs_reclaim_inode_shrink( - struct shrinker *shrink, - struct shrink_control *sc) +void +xfs_reclaim_inodes_nr( + struct xfs_mount *mp, + int nr_to_scan) { - struct xfs_mount *mp; - struct xfs_perag *pag; - xfs_agnumber_t ag; - int reclaimable; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - - mp = container_of(shrink, struct xfs_mount, m_inode_shrink); - if (nr_to_scan) { - /* kick background reclaimer and push the AIL */ - xfs_syncd_queue_reclaim(mp); - xfs_ail_push_all(mp->m_ail); + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); - if (!(gfp_mask & __GFP_FS)) - return -1; + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); +} - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, - &nr_to_scan); - /* terminate if we don't exhaust the scan */ - if (nr_to_scan > 0) - return -1; - } +/* + * Return the number of reclaimable inodes in the filesystem for + * the shrinker to determine how much to reclaim. + */ +int +xfs_reclaim_inodes_count( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t ag = 0; + int reclaimable = 0; - reclaimable = 0; - ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { ag = pag->pag_agno + 1; reclaimable += pag->pag_ici_reclaimable; @@ -1067,18 +1065,3 @@ xfs_reclaim_inode_shrink( return reclaimable; } -void -xfs_inode_shrinker_register( - struct xfs_mount *mp) -{ - mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink; - mp->m_inode_shrink.seeks = DEFAULT_SEEKS; - register_shrinker(&mp->m_inode_shrink); -} - -void -xfs_inode_shrinker_unregister( - struct xfs_mount *mp) -{ - unregister_shrinker(&mp->m_inode_shrink); -} diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index e3a6ad27415..2e156859776 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -43,6 +43,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); +int xfs_reclaim_inodes_count(struct xfs_mount *mp); +void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); @@ -54,7 +56,4 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags); -void xfs_inode_shrinker_register(struct xfs_mount *mp); -void xfs_inode_shrinker_unregister(struct xfs_mount *mp); - #endif -- cgit v1.2.3-70-g09d2