From 1a387d3be2b30c90f20d49a3497a8fc0693a9d18 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Aug 2010 11:46:31 +1000 Subject: xfs: dummy transactions should not dirty VFS state When we need to cover the log, we issue dummy transactions to ensure the current log tail is on disk. Unfortunately we currently use the root inode in the dummy transaction, and the act of committing the transaction dirties the inode at the VFS level. As a result, the VFS writeback of the dirty inode will prevent the filesystem from idling long enough for the log covering state machine to complete. The state machine gets stuck in a loop issuing new dummy transactions to cover the log and never makes progress. To avoid this problem, the dummy transactions should not cause externally visible state changes. To ensure this occurs, make sure that dummy transactions log an unchanging field in the superblock as it's state is never propagated outside the filesystem. This allows the log covering state machine to complete successfully and the filesystem now correctly enters a fully idle state about 90s after the last modification was made. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d159..d59c4a65d49 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -34,6 +34,7 @@ #include "xfs_inode_item.h" #include "xfs_quota.h" #include "xfs_trace.h" +#include "xfs_fsops.h" #include #include @@ -340,38 +341,6 @@ xfs_sync_attr( XFS_ICI_NO_TAG, 0, NULL); } -STATIC int -xfs_commit_dummy_trans( - struct xfs_mount *mp, - uint flags) -{ - struct xfs_inode *ip = mp->m_rootip; - struct xfs_trans *tp; - int error; - - /* - * Put a dummy transaction in the log to tell recovery - * that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return error; -} - STATIC int xfs_sync_fsdata( struct xfs_mount *mp) @@ -432,7 +401,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); + error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -563,7 +532,7 @@ xfs_flush_inodes( /* * Every sync period we need to unpin all items, reclaim inodes and sync * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle. + * filesystem is idle and not frozen. */ STATIC void xfs_sync_worker( @@ -577,8 +546,9 @@ xfs_sync_worker( xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, 0); + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp, 0); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); -- cgit v1.2.3-70-g09d2 From 081003fff467ea0e727f66d5d435b4f473a789b3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 1 Oct 2010 07:43:54 +0000 Subject: xfs: properly account for reclaimed inodes When marking an inode reclaimable, a per-AG counter is increased, the inode is tagged reclaimable in its per-AG tree, and, when this is the first reclaimable inode in the AG, the AG entry in the per-mount tree is also tagged. When an inode is finally reclaimed, however, it is only deleted from the per-AG tree. Neither the counter is decreased, nor is the parent tree's AG entry untagged properly. Since the tags in the per-mount tree are not cleared, the inode shrinker iterates over all AGs that have had reclaimable inodes at one point in time. The counters on the other hand signal an increasing amount of slab objects to reclaim. Since "70e60ce xfs: convert inode shrinker to per-filesystem context" this is not a real issue anymore because the shrinker bails out after one iteration. But the problem was observable on a machine running v2.6.34, where the reclaimable work increased and each process going into direct reclaim eventually got stuck on the xfs inode shrinking path, trying to scan several million objects. Fix this by properly unwinding the reclaimable-state tracking of an inode when it is reclaimed. Signed-off-by: Johannes Weiner Cc: stable@kernel.org Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d59c4a65d49..81976ffed7d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag( xfs_perag_put(pag); } -void -__xfs_inode_clear_reclaim_tag( - xfs_mount_t *mp, +STATIC void +__xfs_inode_clear_reclaim( xfs_perag_t *pag, xfs_inode_t *ip) { - radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); pag->pag_ici_reclaimable--; if (!pag->pag_ici_reclaimable) { /* clear the reclaim tag from the perag radix tree */ @@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag( } } +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_inode_clear_reclaim(pag, ip); +} + /* * Inodes in different states need to be treated differently, and the return * value of xfs_iflush is not sufficient to get this right. The following table @@ -838,6 +846,7 @@ reclaim: if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) ASSERT(0); + __xfs_inode_clear_reclaim(pag, ip); write_unlock(&pag->pag_ici_lock); /* -- cgit v1.2.3-70-g09d2