summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c326
1 files changed, 114 insertions, 212 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bc46c0a133d..2778258fcfa 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -20,7 +20,6 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
-#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
@@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
+/*
+ * helper function to extract extent size hint from inode
+ */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
+ return ip->i_d.di_extsize;
+ if (XFS_IS_REALTIME_INODE(ip))
+ return ip->i_mount->m_sb.sb_rextsize;
+ return 0;
+}
+
#ifdef DEBUG
/*
* Make sure that the extents in the given memory buffer
@@ -119,24 +132,30 @@ xfs_inobp_check(
#endif
/*
- * Find the buffer associated with the given inode map
- * We do basic validation checks on the buffer once it has been
- * retrieved from disk.
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode. It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
*/
-STATIC int
+int
xfs_imap_to_bp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- struct xfs_imap *imap,
- xfs_buf_t **bpp,
- uint buf_flags,
- uint iget_flags)
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_imap *imap,
+ struct xfs_dinode **dipp,
+ struct xfs_buf **bpp,
+ uint buf_flags,
+ uint iget_flags)
{
- int error;
- int i;
- int ni;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ int error;
+ int i;
+ int ni;
+ buf_flags |= XBF_UNMAPPED;
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
(int)imap->im_len, buf_flags, &bp);
if (error) {
@@ -175,8 +194,8 @@ xfs_imap_to_bp(
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EINVAL);
}
- XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
- XFS_ERRLEVEL_HIGH, mp, dip);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+ mp, dip);
#ifdef DEBUG
xfs_emerg(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -190,96 +209,9 @@ xfs_imap_to_bp(
}
xfs_inobp_check(mp, bp);
- *bpp = bp;
- return 0;
-}
-
-/*
- * This routine is called to map an inode number within a file
- * system to the buffer containing the on-disk version of the
- * inode. It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dip parameter
- * it returns a pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * Use xfs_imap() to determine the size and location of the
- * buffer to read from disk.
- */
-int
-xfs_inotobp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_ino_t ino,
- xfs_dinode_t **dipp,
- xfs_buf_t **bpp,
- int *offset,
- uint imap_flags)
-{
- struct xfs_imap imap;
- xfs_buf_t *bp;
- int error;
-
- imap.im_blkno = 0;
- error = xfs_imap(mp, tp, ino, &imap, imap_flags);
- if (error)
- return error;
- error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
- if (error)
- return error;
-
- *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
- *bpp = bp;
- *offset = imap.im_boffset;
- return 0;
-}
-
-
-/*
- * This routine is called to map an inode to the buffer containing
- * the on-disk version of the inode. It returns a pointer to the
- * buffer containing the on-disk inode in the bpp parameter, and in
- * the dip parameter it returns a pointer to the on-disk inode within
- * that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * The inode is expected to already been mapped to its buffer and read
- * in once, thus we can use the mapping information stored in the inode
- * rather than calling xfs_imap(). This allows us to avoid the overhead
- * of looking at the inode btree for small block file systems
- * (see xfs_imap()).
- */
-int
-xfs_itobp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dinode_t **dipp,
- xfs_buf_t **bpp,
- uint buf_flags)
-{
- xfs_buf_t *bp;
- int error;
-
- ASSERT(ip->i_imap.im_blkno != 0);
-
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
- if (error)
- return error;
-
- if (!bp) {
- ASSERT(buf_flags & XBF_TRYLOCK);
- ASSERT(tp == NULL);
- *bpp = NULL;
- return EAGAIN;
- }
-
- *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
*bpp = bp;
+ *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
@@ -782,11 +714,9 @@ xfs_iread(
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
- XBF_LOCK, iget_flags);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
if (error)
return error;
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
/*
* If we got something that isn't an inode it means someone
@@ -863,7 +793,7 @@ xfs_iread(
/*
* Use xfs_trans_brelse() to release the buffer containing the
* on-disk inode, because it was acquired with xfs_trans_read_buf()
- * in xfs_itobp() above. If tp is NULL, this is just a normal
+ * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* brelse(). If we're within a transaction, then xfs_trans_brelse()
* will only release the buffer if it is not dirty within the
* transaction. It will be OK to release the buffer in this case,
@@ -957,7 +887,6 @@ xfs_ialloc(
prid_t prid,
int okalloc,
xfs_buf_t **ialloc_context,
- boolean_t *call_again,
xfs_inode_t **ipp)
{
xfs_ino_t ino;
@@ -972,10 +901,10 @@ xfs_ialloc(
* the on-disk inode to be allocated.
*/
error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
- ialloc_context, call_again, &ino);
+ ialloc_context, &ino);
if (error)
return error;
- if (*call_again || ino == NULLFSINO) {
+ if (*ialloc_context || ino == NULLFSINO) {
*ipp = NULL;
return 0;
}
@@ -1194,7 +1123,9 @@ xfs_itruncate_extents(
int error = 0;
int done = 0;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
+ xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
@@ -1213,7 +1144,7 @@ xfs_itruncate_extents(
* then there is nothing to do.
*/
first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (first_unmap_block == last_block)
return 0;
@@ -1342,7 +1273,8 @@ xfs_iunlink(
* Here we put the head pointer into our next pointer,
* and then we fall through to point the head at us.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error)
return error;
@@ -1416,16 +1348,16 @@ xfs_iunlink_remove(
if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
/*
- * We're at the head of the list. Get the inode's
- * on-disk buffer to see if there is anyone after us
- * on the list. Only modify our next pointer if it
- * is not already NULLAGINO. This saves us the overhead
- * of dealing with the buffer when there is no need to
- * change it.
+ * We're at the head of the list. Get the inode's on-disk
+ * buffer to see if there is anyone after us on the list.
+ * Only modify our next pointer if it is not already NULLAGINO.
+ * This saves us the overhead of dealing with the buffer when
+ * there is no need to change it.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error) {
- xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
+ xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
__func__, error);
return error;
}
@@ -1459,34 +1391,45 @@ xfs_iunlink_remove(
next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
last_ibp = NULL;
while (next_agino != agino) {
- /*
- * If the last inode wasn't the one pointing to
- * us, then release its buffer since we're not
- * going to do anything with it.
- */
- if (last_ibp != NULL) {
+ struct xfs_imap imap;
+
+ if (last_ibp)
xfs_trans_brelse(tp, last_ibp);
- }
+
+ imap.im_blkno = 0;
next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
- error = xfs_inotobp(mp, tp, next_ino, &last_dip,
- &last_ibp, &last_offset, 0);
+
+ error = xfs_imap(mp, tp, next_ino, &imap, 0);
+ if (error) {
+ xfs_warn(mp,
+ "%s: xfs_imap returned error %d.",
+ __func__, error);
+ return error;
+ }
+
+ error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
+ &last_ibp, 0, 0);
if (error) {
xfs_warn(mp,
- "%s: xfs_inotobp() returned error %d.",
+ "%s: xfs_imap_to_bp returned error %d.",
__func__, error);
return error;
}
+
+ last_offset = imap.im_boffset;
next_agino = be32_to_cpu(last_dip->di_next_unlinked);
ASSERT(next_agino != NULLAGINO);
ASSERT(next_agino != 0);
}
+
/*
- * Now last_ibp points to the buffer previous to us on
- * the unlinked list. Pull us from the list.
+ * Now last_ibp points to the buffer previous to us on the
+ * unlinked list. Pull us from the list.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error) {
- xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
+ xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
__func__, error);
return error;
}
@@ -1566,8 +1509,7 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
+ mp->m_bsize * blks_per_cluster, 0);
if (!bp)
return ENOMEM;
@@ -1737,7 +1679,8 @@ xfs_ifree(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error)
return error;
@@ -2347,11 +2290,11 @@ cluster_corrupt_out:
*/
rcu_read_unlock();
/*
- * Clean up the buffer. If it was B_DELWRI, just release it --
+ * Clean up the buffer. If it was delwri, just release it --
* brelse can handle it with no problems. If not, shut down the
* filesystem before releasing the buffer.
*/
- bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+ bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
if (bufwasdelwri)
xfs_buf_relse(bp);
@@ -2377,30 +2320,29 @@ cluster_corrupt_out:
/*
* Unlocks the flush lock
*/
- xfs_iflush_abort(iq);
+ xfs_iflush_abort(iq, false);
kmem_free(ilist);
xfs_perag_put(pag);
return XFS_ERROR(EFSCORRUPTED);
}
/*
- * xfs_iflush() will write a modified inode's changes out to the
- * inode's on disk home. The caller must have the inode lock held
- * in at least shared mode and the inode flush completion must be
- * active as well. The inode lock will still be held upon return from
- * the call and the caller is free to unlock it.
- * The inode flush will be completed when the inode reaches the disk.
- * The flags indicate how the inode's buffer should be written out.
+ * Flush dirty inode metadata into the backing buffer.
+ *
+ * The caller must have the inode lock and the inode flush lock held. The
+ * inode lock will still be held upon return to the caller, and the inode
+ * flush lock will be released after the inode has reached the disk.
+ *
+ * The caller must write out the buffer returned in *bpp and release it.
*/
int
xfs_iflush(
- xfs_inode_t *ip,
- uint flags)
+ struct xfs_inode *ip,
+ struct xfs_buf **bpp)
{
- xfs_inode_log_item_t *iip;
- xfs_buf_t *bp;
- xfs_dinode_t *dip;
- xfs_mount_t *mp;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *bp;
+ struct xfs_dinode *dip;
int error;
XFS_STATS_INC(xs_iflush_count);
@@ -2410,31 +2352,14 @@ xfs_iflush(
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
- iip = ip->i_itemp;
- mp = ip->i_mount;
+ *bpp = NULL;
- /*
- * We can't flush the inode until it is unpinned, so wait for it if we
- * are allowed to block. We know no one new can pin it, because we are
- * holding the inode lock shared and you need to hold it exclusively to
- * pin the inode.
- *
- * If we are not allowed to block, force the log out asynchronously so
- * that when we come back the inode will be unpinned. If other inodes
- * in the same cluster are dirty, they will probably write the inode
- * out for us if they occur after the log force completes.
- */
- if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
- xfs_iunpin(ip);
- xfs_ifunlock(ip);
- return EAGAIN;
- }
xfs_iunpin_wait(ip);
/*
* For stale inodes we cannot rely on the backing buffer remaining
* stale in cache for the remaining life of the stale inode and so
- * xfs_itobp() below may give us a buffer that no longer contains
+ * xfs_imap_to_bp() below may give us a buffer that no longer contains
* inodes below. We have to check this after ensuring the inode is
* unpinned so that it is safe to reclaim the stale inode after the
* flush call.
@@ -2447,20 +2372,21 @@ xfs_iflush(
/*
* This may have been unpinned because the filesystem is shutting
* down forcibly. If that's the case we must not write this inode
- * to disk, because the log record didn't make it to disk!
+ * to disk, because the log record didn't make it to disk.
+ *
+ * We also have to remove the log item from the AIL in this case,
+ * as we wait for an empty AIL as part of the unmount process.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- if (iip)
- iip->ili_fields = 0;
- xfs_ifunlock(ip);
- return XFS_ERROR(EIO);
+ error = XFS_ERROR(EIO);
+ goto abort_out;
}
/*
* Get the buffer containing the on-disk inode.
*/
- error = xfs_itobp(mp, NULL, ip, &dip, &bp,
- (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
+ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
+ 0);
if (error || !bp) {
xfs_ifunlock(ip);
return error;
@@ -2488,23 +2414,20 @@ xfs_iflush(
if (error)
goto cluster_corrupt_out;
- if (flags & SYNC_WAIT)
- error = xfs_bwrite(bp);
- else
- xfs_buf_delwri_queue(bp);
-
- xfs_buf_relse(bp);
- return error;
+ *bpp = bp;
+ return 0;
corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
+ error = XFS_ERROR(EFSCORRUPTED);
+abort_out:
/*
* Unlocks the flush lock
*/
- xfs_iflush_abort(ip);
- return XFS_ERROR(EFSCORRUPTED);
+ xfs_iflush_abort(ip, false);
+ return error;
}
@@ -2706,27 +2629,6 @@ corrupt_out:
return XFS_ERROR(EFSCORRUPTED);
}
-void
-xfs_promote_inode(
- struct xfs_inode *ip)
-{
- struct xfs_buf *bp;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-
- bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
- ip->i_imap.im_len, XBF_TRYLOCK);
- if (!bp)
- return;
-
- if (XFS_BUF_ISDELAYWRITE(bp)) {
- xfs_buf_delwri_promote(bp);
- wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
- }
-
- xfs_buf_relse(bp);
-}
-
/*
* Return a pointer to the extent record at file index idx.
*/