From 43df2ee659d3d854d17686f8f38fc1071b25774b Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Mon, 24 Jun 2013 13:37:23 +0800 Subject: xfs: Remove the left function variable from xfs_ialloc_get_rec() This patch clean out the left function variable as it is useless to xfs_ialloc_get_rec(). Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8f5ae1debf..4345c53c729 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -615,8 +615,7 @@ xfs_ialloc_get_rec( struct xfs_btree_cur *cur, xfs_agino_t agino, xfs_inobt_rec_incore_t *rec, - int *done, - int left) + int *done) { int error; int i; @@ -724,12 +723,12 @@ xfs_dialloc_ag( pag->pagl_leftrec != NULLAGINO && pag->pagl_rightrec != NULLAGINO) { error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, - &trec, &doneleft, 1); + &trec, &doneleft); if (error) goto error1; error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, - &rec, &doneright, 0); + &rec, &doneright); if (error) goto error1; } else { -- cgit v1.2.3-70-g09d2 From 28c8e41af693e4b5cd2d68218f144cf40ce15781 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jun 2013 16:04:55 +1000 Subject: xfs: Inode create item recovery When we find a icreate transaction, we need to get and initialise the buffers in the range that has been passed. Extract and verify the information in the item record, then loop over the range initialising and issuing the buffer writes delayed. Support an arbitrary size range to initialise so that in future when we allocate inodes in much larger chunks all kernels that understand this transaction can still recover them. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 37 +++++++++++---- fs/xfs/xfs_ialloc.h | 8 ++++ fs/xfs/xfs_log_recover.c | 114 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 145 insertions(+), 14 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 4345c53c729..4411565b718 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -150,12 +150,16 @@ xfs_check_agi_freecount( #endif /* - * Initialise a new set of inodes. + * Initialise a new set of inodes. When called without a transaction context + * (e.g. from recovery) we initiate a delayed write of the inode buffers rather + * than logging them (which in a transaction context puts them into the AIL + * for writeback rather than the xfsbufd queue). */ STATIC int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, + struct list_head *buffer_list, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, @@ -247,18 +251,33 @@ xfs_ialloc_inode_init( ino++; uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); xfs_dinode_calc_crc(mp, free); - } else { + } else if (tp) { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } } - if (version == 3) { - /* need to log the entire buffer */ - xfs_trans_log_buf(tp, fbuf, 0, - BBTOB(fbuf->b_length) - 1); + + if (tp) { + /* + * Mark the buffer as an inode allocation buffer so it + * sticks in AIL at the point of this allocation + * transaction. This ensures the they are on disk before + * the tail of the log can be moved past this + * transaction (i.e. by preventing relogging from moving + * it forward in the log). + */ + xfs_trans_inode_alloc_buf(tp, fbuf); + if (version == 3) { + /* need to log the entire buffer */ + xfs_trans_log_buf(tp, fbuf, 0, + BBTOB(fbuf->b_length) - 1); + } + } else { + fbuf->b_flags |= XBF_DONE; + xfs_buf_delwri_queue(fbuf, buffer_list); + xfs_buf_relse(fbuf); } - xfs_trans_inode_alloc_buf(tp, fbuf); } return 0; } @@ -303,7 +322,7 @@ xfs_ialloc_ag_alloc( * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes. - */ + */ agi = XFS_BUF_TO_AGI(agbp); newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); @@ -402,7 +421,7 @@ xfs_ialloc_ag_alloc( * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, + error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, args.len, prandom_u32()); if (error) diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c8da3df271e..68c07320f09 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); +/* + * Inode chunk initialisation routine + */ +int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, + struct list_head *buffer_list, + xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_agblock_t length, unsigned int gen); + extern const struct xfs_buf_ops xfs_agi_buf_ops; #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe2..6fcc910a50b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -45,6 +45,7 @@ #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_icreate_item.h" /* Need all the magic numbers and buffer ops structures from these headers */ #include "xfs_symlink.h" @@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans( * form the cancelled buffer table. Hence they have tobe done last. * * 3. Inode allocation buffers must be replayed before inode items that - * read the buffer and replay changes into it. + * read the buffer and replay changes into it. For filesystems using the + * ICREATE transactions, this means XFS_LI_ICREATE objects need to get + * treated the same as inode allocation buffers as they create and + * initialise the buffers directly. * * 4. Inode unlink buffers must be replayed after inode items are replayed. * This ensures that inodes are completely flushed to the inode buffer @@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans( * from all the other buffers and move them to last. * * Hence, 4 lists, in order from head to tail: - * - buffer_list for all buffers except cancelled/inode unlink buffers - * - item_list for all non-buffer items - * - inode_buffer_list for inode unlink buffers - * - cancel_list for the cancelled buffers + * - buffer_list for all buffers except cancelled/inode unlink buffers + * - item_list for all non-buffer items + * - inode_buffer_list for inode unlink buffers + * - cancel_list for the cancelled buffers + * + * Note that we add objects to the tail of the lists so that first-to-last + * ordering is preserved within the lists. Adding objects to the head of the + * list means when we traverse from the head we walk them in last-to-first + * order. For cancelled buffers and inode unlink buffers this doesn't matter, + * but for all other items there may be specific ordering that we need to + * preserve. */ STATIC int xlog_recover_reorder_trans( @@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans( xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; switch (ITEM_TYPE(item)) { + case XFS_LI_ICREATE: + list_move_tail(&item->ri_list, &buffer_list); + break; case XFS_LI_BUF: if (buf_f->blf_flags & XFS_BLF_CANCEL) { trace_xfs_log_recover_item_reorder_head(log, @@ -2981,6 +2995,93 @@ xlog_recover_efd_pass2( return 0; } +/* + * This routine is called when an inode create format structure is found in a + * committed transaction in the log. It's purpose is to initialise the inodes + * being allocated on disk. This requires us to get inode cluster buffers that + * match the range to be intialised, stamped with inode templates and written + * by delayed write so that subsequent modifications will hit the cached buffer + * and only need writing out at the end of recovery. + */ +STATIC int +xlog_recover_do_icreate_pass2( + struct xlog *log, + struct list_head *buffer_list, + xlog_recover_item_t *item) +{ + struct xfs_mount *mp = log->l_mp; + struct xfs_icreate_log *icl; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + unsigned int count; + unsigned int isize; + xfs_agblock_t length; + + icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; + if (icl->icl_type != XFS_LI_ICREATE) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); + return EINVAL; + } + + if (icl->icl_size != 1) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); + return EINVAL; + } + + agno = be32_to_cpu(icl->icl_ag); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); + return EINVAL; + } + agbno = be32_to_cpu(icl->icl_agbno); + if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); + return EINVAL; + } + isize = be32_to_cpu(icl->icl_isize); + if (isize != mp->m_sb.sb_inodesize) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); + return EINVAL; + } + count = be32_to_cpu(icl->icl_count); + if (!count) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); + return EINVAL; + } + length = be32_to_cpu(icl->icl_length); + if (!length || length >= mp->m_sb.sb_agblocks) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); + return EINVAL; + } + + /* existing allocation is fixed value */ + ASSERT(count == XFS_IALLOC_INODES(mp)); + ASSERT(length == XFS_IALLOC_BLOCKS(mp)); + if (count != XFS_IALLOC_INODES(mp) || + length != XFS_IALLOC_BLOCKS(mp)) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); + return EINVAL; + } + + /* + * Inode buffers can be freed. Do not replay the inode initialisation as + * we could be overwriting something written after this inode buffer was + * cancelled. + * + * XXX: we need to iterate all buffers and only init those that are not + * cancelled. I think that a more fine grained factoring of + * xfs_ialloc_inode_init may be appropriate here to enable this to be + * done easily. + */ + if (xlog_check_buffer_cancelled(log, + XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) + return 0; + + xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, + be32_to_cpu(icl->icl_gen)); + return 0; +} + /* * Free up any resources allocated by the transaction * @@ -3023,6 +3124,7 @@ xlog_recover_commit_pass1( case XFS_LI_EFI: case XFS_LI_EFD: case XFS_LI_DQUOT: + case XFS_LI_ICREATE: /* nothing to do in pass 1 */ return 0; default: @@ -3053,6 +3155,8 @@ xlog_recover_commit_pass2( return xlog_recover_efd_pass2(log, item); case XFS_LI_DQUOT: return xlog_recover_dquot_pass2(log, buffer_list, item); + case XFS_LI_ICREATE: + return xlog_recover_do_icreate_pass2(log, buffer_list, item); case XFS_LI_QUOTAOFF: /* nothing to do in pass2 */ return 0; -- cgit v1.2.3-70-g09d2 From ddf6ad01434e72bfc8423e1619abdaa0af9394a8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jun 2013 16:04:56 +1000 Subject: xfs: Use inode create transaction Replace the use of buffer based logging of inode initialisation, uses the new logical form to describe the range to be initialised in recovery. We continue to "log" the inode buffers to push them into the AIL and ensure that the inode create transaction is not removed from the log before the inode buffers are written to disk. Update the transaction identifier and reservations to match the changed implementation. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_buf_item.c | 12 ++++++++++-- fs/xfs/xfs_ialloc.c | 32 +++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) (limited to 'fs/xfs/xfs_ialloc.c') diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 61f68768ee8..bfc4e0c26fd 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -310,13 +310,21 @@ xfs_buf_item_format( /* * If it is an inode buffer, transfer the in-memory state to the - * format flags and clear the in-memory state. We do not transfer + * format flags and clear the in-memory state. + * + * For buffer based inode allocation, we do not transfer * this state if the inode buffer allocation has not yet been committed * to the log as setting the XFS_BLI_INODE_BUF flag will prevent * correct replay of the inode allocation. + * + * For icreate item based inode allocation, the buffers aren't written + * to the journal during allocation, and hence we should always tag the + * buffer as an inode buffer so that the correct unlinked list replay + * occurs during recovery. */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && + if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || + !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; bip->bli_flags &= ~XFS_BLI_INODE_BUF; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 4411565b718..7a0c17d7ec0 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -38,6 +38,7 @@ #include "xfs_bmap.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_icreate_item.h" /* @@ -155,7 +156,7 @@ xfs_check_agi_freecount( * than logging them (which in a transaction context puts them into the AIL * for writeback rather than the xfsbufd queue). */ -STATIC int +int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, @@ -212,6 +213,18 @@ xfs_ialloc_inode_init( version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); + + /* + * log the initialisation that is about to take place as an + * logical operation. This means the transaction does not + * need to log the physical changes to the inode buffers as log + * recovery will know what initialisation is actually needed. + * Hence we only need to log the buffers as "ordered" buffers so + * they track in the AIL as if they were physically logged. + */ + if (tp) + xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), + mp->m_sb.sb_inodesize, length, gen); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else @@ -227,13 +240,8 @@ xfs_ialloc_inode_init( XBF_UNMAPPED); if (!fbuf) return ENOMEM; - /* - * Initialize all inodes in this buffer and then log them. - * - * XXX: It would be much better if we had just one transaction - * to log a whole cluster of inodes instead of all the - * individual transactions causing a lot of log traffic. - */ + + /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < ninodes; i++) { @@ -269,7 +277,13 @@ xfs_ialloc_inode_init( */ xfs_trans_inode_alloc_buf(tp, fbuf); if (version == 3) { - /* need to log the entire buffer */ + /* + * Mark the buffer as ordered so that they are + * not physically logged in the transaction but + * still tracked in the AIL as part of the + * transaction and pin the log appropriately. + */ + xfs_trans_ordered_buf(tp, fbuf); xfs_trans_log_buf(tp, fbuf, 0, BBTOB(fbuf->b_length) - 1); } -- cgit v1.2.3-70-g09d2