diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 103 |
1 files changed, 76 insertions, 27 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1..e22f0057d21 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( { struct inode *inode = VFS_I(ip); + ASSERT(rcu_read_lock_held()); + + /* + * check for stale RCU freed inode + * + * If the inode has been reallocated, it doesn't matter if it's not in + * the AG we are walking - we are walking for writeback, so if it + * passes all the "valid inode" checks and is dirty, then we'll write + * it back anyway. If it has been reallocated and still being + * initialised, the XFS_INEW check below will catch it. + */ + spin_lock(&ip->i_flags_lock); + if (!ip->i_ino) + goto out_unlock_noent; + + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock_noent; + spin_unlock(&ip->i_flags_lock); + /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return EFSCORRUPTED; - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - return ENOENT; - /* If we can't grab the inode, it must on it's way to reclaim. */ if (!igrab(inode)) return ENOENT; @@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( /* inode is valid */ return 0; + +out_unlock_noent: + spin_unlock(&ip->i_flags_lock); + return ENOENT; } STATIC int @@ -98,12 +118,12 @@ restart: int error = 0; int i; - read_lock(&pag->pag_ici_lock); + rcu_read_lock(); nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH); if (!nr_found) { - read_unlock(&pag->pag_ici_lock); + rcu_read_unlock(); break; } @@ -118,18 +138,26 @@ restart: batch[i] = NULL; /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. + * Update the index for the next lookup. Catch + * overflows into the next AG range which can occur if + * we have inodes in the last block of the AG and we + * are currently pointing to the last inode. + * + * Because we may see inodes that are from the wrong AG + * due to RCU freeing and reallocation, only update the + * index if it lies in this AG. It was a race that lead + * us to see this inode, so another lookup from the + * same index will not find it again. */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) + continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ - read_unlock(&pag->pag_ici_lock); + rcu_read_unlock(); for (i = 0; i < nr_found; i++) { if (!batch[i]) @@ -334,7 +362,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); + error2 = xfs_fs_log_dummy(mp); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -475,13 +503,14 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ - error = xfs_qm_sync(mp, SYNC_TRYLOCK); if (mp->m_super->s_frozen == SB_UNFROZEN && xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp, 0); + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + xfs_reclaim_inodes(mp, 0); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); @@ -592,12 +621,12 @@ xfs_inode_set_reclaim_tag( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - write_lock(&pag->pag_ici_lock); + spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); + spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } @@ -639,9 +668,14 @@ xfs_reclaim_inode_grab( struct xfs_inode *ip, int flags) { + ASSERT(rcu_read_lock_held()); + + /* quick check for stale RCU freed inode */ + if (!ip->i_ino) + return 1; /* - * do some unlocked checks first to avoid unnecceary lock traffic. + * do some unlocked checks first to avoid unnecessary lock traffic. * The first is a flush lock check, the second is a already in reclaim * check. Only do these checks if we are not going to block on locks. */ @@ -654,11 +688,16 @@ xfs_reclaim_inode_grab( * The radix tree lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the * XFS_IRECLAIM flag set it will not touch us. + * + * Due to RCU lookup, we may find inodes that have been freed and only + * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that + * aren't candidates for reclaim at all, so we must check the + * XFS_IRECLAIMABLE is set first before proceeding to reclaim. */ spin_lock(&ip->i_flags_lock); - ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* ignore as it is already under reclaim */ + if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || + __xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* not a reclaim candidate. */ spin_unlock(&ip->i_flags_lock); return 1; } @@ -795,12 +834,12 @@ reclaim: * added to the tree assert that it's been there before to catch * problems with the inode life time early on. */ - write_lock(&pag->pag_ici_lock); + spin_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) ASSERT(0); __xfs_inode_clear_reclaim(pag, ip); - write_unlock(&pag->pag_ici_lock); + spin_unlock(&pag->pag_ici_lock); /* * Here we do an (almost) spurious inode lock in order to coordinate @@ -864,14 +903,14 @@ restart: struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int i; - write_lock(&pag->pag_ici_lock); + rcu_read_lock(); nr_found = radix_tree_gang_lookup_tag( &pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { - write_unlock(&pag->pag_ici_lock); + rcu_read_unlock(); break; } @@ -891,14 +930,24 @@ restart: * occur if we have inodes in the last block of * the AG and we are currently pointing to the * last inode. + * + * Because we may see inodes that are from the + * wrong AG due to RCU freeing and + * reallocation, only update the index if it + * lies in this AG. It was a race that lead us + * to see this inode, so another lookup from + * the same index will not find it again. */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != + pag->pag_agno) + continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ - write_unlock(&pag->pag_ici_lock); + rcu_read_unlock(); for (i = 0; i < nr_found; i++) { if (!batch[i]) |