summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-03-27 18:00:38 +1100
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-18 11:42:09 +1000
commit75de2a91c98a6f486f261c1367fe59f5583e15a3 (patch)
tree599d1024b34a6649f8c589100f72adf0f30b40b5
parent535f6b3735db6ef6026537bfe55ae00c3d9cc1ee (diff)
[XFS] Account for inode cluster alignment in all allocations
At ENOSPC, we can get a filesystem shutdown due to a cancelling a dirty transaction in xfs_mkdir or xfs_create. This is due to the initial allocation attempt not taking into account inode alignment and hence we can prepare the AGF freelist for allocation when it's not actually possible to do an allocation. This results in inode allocation returning ENOSPC with a dirty transaction, and hence we shut down the filesystem. Because the first allocation is an exact allocation attempt, we must tell the allocator that the alignment does not affect the allocation attempt. i.e. we will accept any extent alignment as long as the extent starts at the block we want. Unfortunately, this means that if the longest free extent is less than the length + alignment necessary for fallback allocation attempts but is long enough to attempt a non-aligned allocation, we will modify the free list. If we then have the exact allocation fail, all other allocation attempts will also fail due to the alignment constraint being taken into account. Hence the initial attempt needs to set the "alignment slop" field so that alignment, while not required, must be taken into account when determining if there is enough space left in the AG to do the allocation. That means if the exact allocation fails, we will not dirty the freelist if there is not enough space available fo a subsequent allocation to succeed. Hence we get an ENOSPC error back to userspace without shutting down the filesystem. SGI-PV: 978886 SGI-Modid: xfs-linux-melb:xfs-kern:30699a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/xfs_ialloc.c44
1 files changed, 29 insertions, 15 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5a146cb2298..a64dfbd565a 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -107,6 +107,16 @@ xfs_ialloc_log_di(
/*
* Allocation group level functions.
*/
+static inline int
+xfs_ialloc_cluster_alignment(
+ xfs_alloc_arg_t *args)
+{
+ if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
+ args->mp->m_sb.sb_inoalignmt >=
+ XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
+ return args->mp->m_sb.sb_inoalignmt;
+ return 1;
+}
/*
* Allocate new inodes in the allocation group specified by agbp.
@@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc(
args.mod = args.total = args.wasdel = args.isfl =
args.userdata = args.minalignslop = 0;
args.prod = 1;
- args.alignment = 1;
+
/*
- * Allow space for the inode btree to split.
+ * We need to take into account alignment here to ensure that
+ * we don't modify the free list if we fail to have an exact
+ * block. If we don't have an exact match, and every oher
+ * attempt allocation attempt fails, we'll end up cancelling
+ * a dirty transaction and shutting down.
+ *
+ * For an exact allocation, alignment must be 1,
+ * however we need to take cluster alignment into account when
+ * fixing up the freelist. Use the minalignslop field to
+ * indicate that extra blocks might be required for alignment,
+ * but not to use them in the actual exact allocation.
*/
+ args.alignment = 1;
+ args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
+
+ /* Allow space for the inode btree to split. */
args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc(
ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
args.alignment = args.mp->m_dalign;
isaligned = 1;
- } else if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
- args.mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(args.mp,
- XFS_INODE_CLUSTER_SIZE(args.mp)))
- args.alignment = args.mp->m_sb.sb_inoalignmt;
- else
- args.alignment = 1;
+ } else
+ args.alignment = xfs_ialloc_cluster_alignment(&args);
/*
* Need to figure out where to allocate the inode blocks.
* Ideally they should be spaced out through the a.g.
@@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc(
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp,
be32_to_cpu(agi->agi_seqno), args.agbno);
- if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
- args.mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
- args.alignment = args.mp->m_sb.sb_inoalignmt;
- else
- args.alignment = 1;
+ args.alignment = xfs_ialloc_cluster_alignment(&args);
if ((error = xfs_alloc_vextent(&args)))
return error;
}