summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_filestream.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_filestream.c')
-rw-r--r--fs/xfs/xfs_filestream.c126
1 files changed, 91 insertions, 35 deletions
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a631e1451ab..9b715dce569 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,13 +18,9 @@
#include "xfs.h"
#include "xfs_bmap_btree.h"
#include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
@@ -127,6 +123,82 @@ typedef struct fstrm_item
xfs_inode_t *pip; /* Parent directory inode pointer. */
} fstrm_item_t;
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation. When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all. The race condition this addresses is the following:
+ *
+ * - The work queue scheduler fires and pulls a filestream directory cache
+ * element off the LRU end of the cache for deletion, then gets pre-empted.
+ * - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ * remaining items from the cache and reallocates the mount point's per-ag
+ * array, resetting all the counters to zero.
+ * - The work queue thread resumes and calls the free function for the element
+ * it started cleaning up earlier. In the process it decrements the
+ * filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+static int
+xfs_filestream_peek_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_read(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
+}
+
+static int
+xfs_filestream_get_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_inc_return(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
+}
+
+static void
+xfs_filestream_put_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ atomic_dec(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+}
/*
* Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -140,6 +212,7 @@ _xfs_filestream_pick_ag(
int flags,
xfs_extlen_t minlen)
{
+ int streams, max_streams;
int err, trylock, nscan;
xfs_extlen_t longest, free, minfree, maxfree = 0;
xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
@@ -155,15 +228,15 @@ _xfs_filestream_pick_ag(
trylock = XFS_ALLOC_FLAG_TRYLOCK;
for (nscan = 0; 1; nscan++) {
-
- TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
-
- pag = mp->m_perag + ag;
+ pag = xfs_perag_get(mp, ag);
+ TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
if (!pag->pagf_init) {
err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
- if (err && !trylock)
+ if (err && !trylock) {
+ xfs_perag_put(pag);
return err;
+ }
}
/* Might fail sometimes during the 1st pass with trylock set. */
@@ -173,6 +246,7 @@ _xfs_filestream_pick_ag(
/* Keep track of the AG with the most free blocks. */
if (pag->pagf_freeblks > maxfree) {
maxfree = pag->pagf_freeblks;
+ max_streams = atomic_read(&pag->pagf_fstrms);
max_ag = ag;
}
@@ -195,6 +269,8 @@ _xfs_filestream_pick_ag(
/* Break out, retaining the reference on the AG. */
free = pag->pagf_freeblks;
+ streams = atomic_read(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
*agp = ag;
break;
}
@@ -202,6 +278,7 @@ _xfs_filestream_pick_ag(
/* Drop the reference on this AG, it's not usable. */
xfs_filestream_put_ag(mp, ag);
next_ag:
+ xfs_perag_put(pag);
/* Move to the next AG, wrapping to AG 0 if necessary. */
if (++ag >= mp->m_sb.sb_agcount)
ag = 0;
@@ -229,6 +306,7 @@ next_ag:
if (max_ag != NULLAGNUMBER) {
xfs_filestream_get_ag(mp, max_ag);
TRACE_AG_PICK1(mp, max_ag, maxfree);
+ streams = max_streams;
free = maxfree;
*agp = max_ag;
break;
@@ -240,16 +318,14 @@ next_ag:
return 0;
}
- TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
- free, nscan, flags);
+ TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
return 0;
}
/*
* Set the allocation group number for a file or a directory, updating inode
- * references and per-AG references as appropriate. Must be called with the
- * m_peraglock held in read mode.
+ * references and per-AG references as appropriate.
*/
static int
_xfs_filestream_update_ag(
@@ -351,16 +427,14 @@ xfs_fstrm_free_func(
{
fstrm_item_t *item = (fstrm_item_t *)data;
xfs_inode_t *ip = item->ip;
- int ref;
ASSERT(ip->i_ino == ino);
xfs_iflags_clear(ip, XFS_IFILESTREAM);
/* Drop the reference taken on the AG when the item was added. */
- ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+ xfs_filestream_put_ag(ip->i_mount, item->ag);
- ASSERT(ref >= 0);
TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
xfs_filestream_peek_ag(ip->i_mount, item->ag));
@@ -451,20 +525,6 @@ xfs_filestream_unmount(
}
/*
- * If the mount point's m_perag array is going to be reallocated, all
- * outstanding cache entries must be flushed to avoid accessing reference count
- * addresses that have been freed. The call to xfs_filestream_flush() must be
- * made inside the block that holds the m_peraglock in write mode to do the
- * reallocation.
- */
-void
-xfs_filestream_flush(
- xfs_mount_t *mp)
-{
- xfs_mru_cache_flush(mp->m_filestream);
-}
-
-/*
* Return the AG of the filestream the file or directory belongs to, or
* NULLAGNUMBER otherwise.
*/
@@ -526,7 +586,6 @@ xfs_filestream_associate(
mp = pip->i_mount;
cache = mp->m_filestream;
- down_read(&mp->m_peraglock);
/*
* We have a problem, Houston.
@@ -543,10 +602,8 @@ xfs_filestream_associate(
*
* So, if we can't get the iolock without sleeping then just give up
*/
- if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
- up_read(&mp->m_peraglock);
+ if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
return 1;
- }
/* If the parent directory is already in the cache, use its AG. */
item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -601,7 +658,6 @@ exit_did_pick:
exit:
xfs_iunlock(pip, XFS_IOLOCK_EXCL);
- up_read(&mp->m_peraglock);
return -err;
}