diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 69 |
1 files changed, 66 insertions, 3 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index af3275965c7..debe2822c93 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -461,7 +461,6 @@ xfs_sync_worker( error = xfs_fs_log_dummy(mp); else xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); error = xfs_qm_sync(mp, SYNC_TRYLOCK); } @@ -470,6 +469,52 @@ xfs_sync_worker( } /* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) +{ + + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; + + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); + } + rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + +/* * Flush delayed allocate data, attempting to free up reserved space * from existing allocations. At this point a new allocation attempt * has failed with ENOSPC and we are in the process of scratching our @@ -508,7 +553,10 @@ xfs_syncd_init( { INIT_WORK(&mp->m_flush_work, xfs_flush_worker); INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); return 0; } @@ -518,6 +566,7 @@ xfs_syncd_stop( struct xfs_mount *mp) { cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); cancel_work_sync(&mp->m_flush_work); } @@ -537,6 +586,10 @@ __xfs_inode_set_reclaim_tag( XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } @@ -953,7 +1006,13 @@ xfs_reclaim_inodes( } /* - * Shrinker infrastructure. + * Inode cache shrinker. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doiing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. */ static int xfs_reclaim_inode_shrink( @@ -968,10 +1027,14 @@ xfs_reclaim_inode_shrink( mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { + /* kick background reclaimer */ + xfs_syncd_queue_reclaim(mp); + if (!(gfp_mask & __GFP_FS)) return -1; - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, + &nr_to_scan); /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; |