summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-24 18:40:15 +1000
committerAlex Elder <aelder@sgi.com>2010-10-18 15:07:52 -0500
commit65d0f20533c503b50bd5e7e86434512af7761eea (patch)
tree0e04d6898fc0c729886067a3daf79b10a7a19992
parent69d6cc76cff3573ceefda178b75e20878866fdc3 (diff)
xfs: split inode AG walking into separate code for reclaim
The reclaim walk requires different locking and has a slightly different walk algorithm, so separate it out so that it can be optimised separately. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c202
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_mount.c26
-rw-r--r--fs/xfs/xfs_mount.h2
6 files changed, 122 insertions, 115 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3a1d229b478..b5cdf0ef39e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -40,78 +40,46 @@
#include <linux/freezer.h>
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- uint32_t *first_index,
- int tag)
-{
- int nr_found;
- struct xfs_inode *ip;
-
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- if (tag == XFS_ICI_NO_TAG) {
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1);
- } else {
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1, tag);
- }
- if (!nr_found)
- return NULL;
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- return NULL;
- return ip;
-}
-
STATIC int
xfs_inode_ag_walk(
struct xfs_mount *mp,
struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
uint32_t first_index;
int last_error = 0;
int skipped;
+ int done;
restart:
+ done = 0;
skipped = 0;
first_index = 0;
do {
int error = 0;
+ int nr_found;
xfs_inode_t *ip;
- if (exclusive)
- write_lock(&pag->pag_ici_lock);
- else
- read_lock(&pag->pag_ici_lock);
- ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
- if (!ip) {
- if (exclusive)
- write_unlock(&pag->pag_ici_lock);
- else
- read_unlock(&pag->pag_ici_lock);
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void **)&ip, first_index, 1);
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
break;
}
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+
/* execute releases pag->pag_ici_lock */
error = execute(ip, pag, flags);
if (error == EAGAIN) {
@@ -125,7 +93,7 @@ restart:
if (error == EFSCORRUPTED)
break;
- } while ((*nr_to_scan)--);
+ } while (!done);
if (skipped) {
delay(1);
@@ -134,73 +102,29 @@ restart:
return last_error;
}
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
- struct xfs_mount *mp,
- xfs_agnumber_t *first,
- int tag)
-{
- struct xfs_perag *pag = NULL;
-
- if (tag == XFS_ICI_RECLAIM_TAG) {
- int found;
- int ref;
-
- rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, *first, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
- }
- *first = pag->pag_agno + 1;
- /* open coded pag reference increment */
- ref = atomic_inc_return(&pag->pag_ref);
- rcu_read_unlock();
- trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
- } else {
- pag = xfs_perag_get(mp, *first);
- (*first)++;
- }
- return pag;
-}
-
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
- int nr;
- nr = nr_to_scan ? *nr_to_scan : INT_MAX;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
- error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
- exclusive, &nr);
+ while ((pag = xfs_perag_get(mp, ag))) {
+ ag = pag->pag_agno + 1;
+ error = xfs_inode_ag_walk(mp, pag, execute, flags);
xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
- if (nr <= 0)
- break;
}
- if (nr_to_scan)
- *nr_to_scan = nr;
return XFS_ERROR(last_error);
}
@@ -318,8 +242,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
if (error)
return XFS_ERROR(error);
@@ -337,8 +260,7 @@ xfs_sync_attr(
{
ASSERT((flags & ~SYNC_WAIT) == 0);
- return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
}
STATIC int
@@ -868,13 +790,72 @@ reclaim:
}
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+ struct xfs_mount *mp,
+ int flags,
+ int *nr_to_scan)
+{
+ struct xfs_perag *pag;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+
+ ag = 0;
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ unsigned long first_index = 0;
+ int done = 0;
+
+ ag = pag->pag_agno + 1;
+
+ do {
+ struct xfs_inode *ip;
+ int nr_found;
+
+ write_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, first_index, 1,
+ XFS_ICI_RECLAIM_TAG);
+ if (!nr_found) {
+ write_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+
+ error = xfs_reclaim_inode(ip, pag, flags);
+ if (error && last_error != EFSCORRUPTED)
+ last_error = error;
+
+ } while (!done && (*nr_to_scan)--);
+
+ xfs_perag_put(pag);
+ }
+ return XFS_ERROR(last_error);
+}
+
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
- XFS_ICI_RECLAIM_TAG, 1, NULL);
+ int nr_to_scan = INT_MAX;
+
+ return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
}
/*
@@ -896,17 +877,16 @@ xfs_reclaim_inode_shrink(
if (!(gfp_mask & __GFP_FS))
return -1;
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
- XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- /* if we don't exhaust the scan, don't bother coming back */
+ xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan);
+ /* terminate if we don't exhaust the scan */
if (nr_to_scan > 0)
return -1;
}
reclaimable = 0;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
- XFS_ICI_RECLAIM_TAG))) {
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index fe78726196f..e8a352896d2 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag, int write_lock, int *nr_to_scan);
+ int flags);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2a1d4fbd9ed..286dc201c5b 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 7a71336f792..ac11fbef37f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 912101d280b..d66e87c7c3a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
return pag;
}
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t first,
+ int tag)
+{
+ struct xfs_perag *pag;
+ int found;
+ int ref;
+
+ rcu_read_lock();
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, first, 1, tag);
+ if (found <= 0) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ ref = atomic_inc_return(&pag->pag_ref);
+ rcu_read_unlock();
+ trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+ return pag;
+}
+
void
xfs_perag_put(struct xfs_perag *pag)
{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 622da2179a5..7ab240930ba 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
* perag get/put wrappers for ref counting
*/
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
+ int tag);
void xfs_perag_put(struct xfs_perag *pag);
/*