From 1316d4da3f632d5843d5a446203e73067dc40f09 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jul 2011 05:27:36 +0000 Subject: xfs: unpin stale inodes directly in IOP_COMMITTED When inodes are marked stale in a transaction, they are treated specially when the inode log item is being inserted into the AIL. It tries to avoid moving the log item forward in the AIL due to a race condition with the writing the underlying buffer back to disk. The was "fixed" in commit de25c18 ("xfs: avoid moving stale inodes in the AIL"). To avoid moving the item forward, we return a LSN smaller than the commit_lsn of the completing transaction, thereby trying to trick the commit code into not moving the inode forward at all. I'm not sure this ever worked as intended - it assumes the inode is already in the AIL, but I don't think the returned LSN would have been small enough to prevent moving the inode. It appears that the reason it worked is that the lower LSN of the inodes meant they were inserted into the AIL and flushed before the inode buffer (which was moved to the commit_lsn of the transaction). The big problem is that with delayed logging, the returning of the different LSN means insertion takes the slow, non-bulk path. Worse yet is that insertion is to a position -before- the commit_lsn so it is doing a AIL traversal on every insertion, and has to walk over all the items that have already been inserted into the AIL. It's expensive. To compound the matter further, with delayed logging inodes are likely to go from clean to stale in a single checkpoint, which means they aren't even in the AIL at all when we come across them at AIL insertion time. Hence these were all getting inserted into the AIL when they simply do not need to be as inodes marked XFS_ISTALE are never written back. Transactional/recovery integrity is maintained in this case by the other items in the unlink transaction that were modified (e.g. the AGI btree blocks) and committed in the same checkpoint. So to fix this, simply unpin the stale inodes directly in xfs_inode_item_committed() and return -1 to indicate that the AIL insertion code does not need to do any further processing of these inodes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7c7bc2b786b..c83f63b33aa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1361,7 +1361,7 @@ xfs_trans_item_committed( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* If the committed routine returns -1, item has been freed. */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) return; @@ -1474,7 +1474,7 @@ xfs_trans_committed_bulk( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* item_lsn of -1 means the item was freed */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; -- cgit v1.2.3-70-g09d2 From 7a249cf83da1813cfa71cfe1e265b40045eceb47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2011 14:34:42 +0200 Subject: xfs: fix filesystsem freeze race in xfs_trans_alloc As pointed out by Jan xfs_trans_alloc can race with a concurrent filesystem freeze when it sleeps during the memory allocation. Fix this by moving the wait_for_freeze call after the memory allocation. This means moving the freeze into the low-level _xfs_trans_alloc helper, which thus grows a new argument. Also fix up some comments in that area while at it. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Reviewed-by: Dave Chinner --- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_iomap.c | 3 +-- fs/xfs/xfs_mount.c | 14 +++++++------- fs/xfs/xfs_trans.c | 27 ++++++++++----------------- fs/xfs/xfs_trans.h | 10 ++++++++-- 5 files changed, 27 insertions(+), 29 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9153d2c77ca..b7c492d293b 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -626,7 +626,7 @@ xfs_fs_log_dummy( xfs_trans_t *tp; int error; - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); + tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP, false); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 091d82b94c4..982b71108f3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -688,8 +688,7 @@ xfs_iomap_write_unwritten( * the same inode that we complete here and might deadlock * on the iolock. */ - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); + tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS, true); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b49b82363d2..63659ee316b 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1566,15 +1566,9 @@ xfs_fs_writable(xfs_mount_t *mp) } /* - * xfs_log_sbcount - * * Called either periodically to keep the on disk superblock values * roughly up to date or from unmount to make sure the values are * correct on a clean unmount. - * - * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not not - * block when the transaction subsystem is in its frozen state. */ int xfs_log_sbcount( @@ -1596,7 +1590,13 @@ xfs_log_sbcount( if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) return 0; - tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); + /* + * We can be called during the process of freezing, so make sure + * we go ahead even if the frozen for new transactions. We will + * always use a sync transaction in the freeze path to make sure + * the transaction has completed by the time we return. + */ + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP, false); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b33aa..2837220ea5c 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -566,31 +566,24 @@ xfs_trans_init( /* * This routine is called to allocate a transaction structure. + * * The type parameter indicates the type of the transaction. These * are enumerated in xfs_trans.h. - * - * Dynamically allocate the transaction structure from the transaction - * zone, initialize it, and return it to the caller. */ -xfs_trans_t * -xfs_trans_alloc( - xfs_mount_t *mp, - uint type) -{ - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - return _xfs_trans_alloc(mp, type, KM_SLEEP); -} - -xfs_trans_t * +struct xfs_trans * _xfs_trans_alloc( - xfs_mount_t *mp, - uint type, - uint memflags) + struct xfs_mount *mp, + uint type, + uint memflags, + bool wait_for_freeze) { - xfs_trans_t *tp; + struct xfs_trans *tp; atomic_inc(&mp->m_active_trans); + if (wait_for_freeze) + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + tp = kmem_zone_zalloc(xfs_trans_zone, memflags); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 06a9759b635..4e78432ce14 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -447,8 +447,14 @@ typedef struct xfs_trans { /* * XFS transaction mechanism exported interfaces. */ -xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint, bool); + +static inline struct xfs_trans * +xfs_trans_alloc(struct xfs_mount *mp, uint type) +{ + return _xfs_trans_alloc(mp, type, KM_SLEEP, true); +} + xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); -- cgit v1.2.3-70-g09d2 From b2ce39740066604288876c752d8170b3b17a21aa Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 11 Jul 2011 09:51:44 -0500 Subject: Revert "xfs: fix filesystsem freeze race in xfs_trans_alloc" This reverts commit 7a249cf83da1813cfa71cfe1e265b40045eceb47. That commit created a situation that could lead to a filesystem hang. As Dave Chinner pointed out, xfs_trans_alloc() could hold a reference to m_active_trans (i.e., keep it non-zero) and then wait for SB_FREEZE_TRANS to complete. Meanwhile a filesystem freeze request could set SB_FREEZE_TRANS and then wait for m_active_trans to drop to zero. Nobody benefits from this sequence of events... Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_iomap.c | 3 ++- fs/xfs/xfs_mount.c | 14 +++++++------- fs/xfs/xfs_trans.c | 27 +++++++++++++++++---------- fs/xfs/xfs_trans.h | 10 ++-------- 5 files changed, 29 insertions(+), 27 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c492d293b..9153d2c77ca 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -626,7 +626,7 @@ xfs_fs_log_dummy( xfs_trans_t *tp; int error; - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP, false); + tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 982b71108f3..091d82b94c4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -688,7 +688,8 @@ xfs_iomap_write_unwritten( * the same inode that we complete here and might deadlock * on the iolock. */ - tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS, true); + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2be5e5cf897..a19e92381f9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1559,9 +1559,15 @@ xfs_fs_writable(xfs_mount_t *mp) } /* + * xfs_log_sbcount + * * Called either periodically to keep the on disk superblock values * roughly up to date or from unmount to make sure the values are * correct on a clean unmount. + * + * Note this code can be called during the process of freezing, so + * we may need to use the transaction allocator which does not not + * block when the transaction subsystem is in its frozen state. */ int xfs_log_sbcount( @@ -1583,13 +1589,7 @@ xfs_log_sbcount( if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) return 0; - /* - * We can be called during the process of freezing, so make sure - * we go ahead even if the frozen for new transactions. We will - * always use a sync transaction in the freeze path to make sure - * the transaction has completed by the time we return. - */ - tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP, false); + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2837220ea5c..c83f63b33aa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -566,24 +566,31 @@ xfs_trans_init( /* * This routine is called to allocate a transaction structure. - * * The type parameter indicates the type of the transaction. These * are enumerated in xfs_trans.h. + * + * Dynamically allocate the transaction structure from the transaction + * zone, initialize it, and return it to the caller. */ -struct xfs_trans * +xfs_trans_t * +xfs_trans_alloc( + xfs_mount_t *mp, + uint type) +{ + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + return _xfs_trans_alloc(mp, type, KM_SLEEP); +} + +xfs_trans_t * _xfs_trans_alloc( - struct xfs_mount *mp, - uint type, - uint memflags, - bool wait_for_freeze) + xfs_mount_t *mp, + uint type, + uint memflags) { - struct xfs_trans *tp; + xfs_trans_t *tp; atomic_inc(&mp->m_active_trans); - if (wait_for_freeze) - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - tp = kmem_zone_zalloc(xfs_trans_zone, memflags); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 4e78432ce14..06a9759b635 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -447,14 +447,8 @@ typedef struct xfs_trans { /* * XFS transaction mechanism exported interfaces. */ -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint, bool); - -static inline struct xfs_trans * -xfs_trans_alloc(struct xfs_mount *mp, uint type) -{ - return _xfs_trans_alloc(mp, type, KM_SLEEP, true); -} - +xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); -- cgit v1.2.3-70-g09d2 From 1d8c95a363bf8cd4d4182dd19c01693b635311c2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 18 Jul 2011 03:40:16 +0000 Subject: xfs: use a cursor for bulk AIL insertion Delayed logging can insert tens of thousands of log items into the AIL at the same LSN. When the committing of log commit records occur, we can get insertions occurring at an LSN that is not at the end of the AIL. If there are thousands of items in the AIL on the tail LSN, each insertion has to walk the AIL to find the correct place to insert the new item into the AIL. This can consume large amounts of CPU time and block other operations from occurring while the traversals are in progress. To avoid this repeated walk, use a AIL cursor to record where we should be inserting the new items into the AIL without having to repeat the walk. The cursor infrastructure already provides this functionality for push walks, so is a simple extension of existing code. While this will not avoid the initial walk, it will avoid repeating it tens of thousands of times during a single checkpoint commit. This version includes logic improvements from Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 27 ++++++++++-- fs/xfs/xfs_trans_ail.c | 109 ++++++++++++++++++++++++++++++++++++++---------- fs/xfs/xfs_trans_priv.h | 10 +++-- 3 files changed, 118 insertions(+), 28 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b33aa..efc147f0e9b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1426,6 +1426,7 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. + * + * The AIL cursor is used to optimise the insert process. If commit_lsn is not + * at the end of the AIL, the insert cursor avoids the need to walk + * the AIL to find the insertion point on every xfs_log_item_batch_insert() + * call. This saves a lot of needless list walking and is a net win, even + * though it slightly increases that amount of AIL lock traffic to set it up + * and tear it down. */ void xfs_trans_committed_bulk( @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; + struct xfs_ail_cursor cur; int i = 0; + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); + spin_unlock(&ailp->xa_lock); + /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. + * we have the ail lock. Then unpin the item. This does + * not affect the AIL cursor the bulk insert path is + * using. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, + xfs_log_item_batch_insert(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); + + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5fc2380092c..9a69dc06ea8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -272,9 +272,9 @@ xfs_trans_ail_cursor_clear( } /* - * Return the item in the AIL with the current lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). + * Initialise the cursor to the first item in the AIL with the given @lsn. + * This searches the list from lowest LSN to highest. Pass a @lsn of zero + * to initialise the cursor to the first item in the AIL. */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -300,31 +300,97 @@ out: } /* - * splice the log item list into the AIL at the given LSN. + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. If there is no item with + * the value of @lsn, then it sets the cursor to the last item with an LSN lower + * than @lsn. + */ +static struct xfs_log_item * +__xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) + return lip; + } + return NULL; +} + +/* + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. + */ +struct xfs_log_item * +xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) +{ + xfs_trans_ail_cursor_init(ailp, cur); + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); + return cur->item; +} + +/* + * splice the log item list into the AIL at the given LSN. We splice to the + * tail of the given LSN to maintain insert order for push traversals. The + * cursor is optional, allowing repeated updates to the same LSN to avoid + * repeated traversals. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct list_head *list, + xfs_lsn_t lsn) { - xfs_log_item_t *next_lip; + struct xfs_log_item *lip = cur ? cur->item : NULL; + struct xfs_log_item *next_lip; - /* If the list is empty, just insert the item. */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; + /* + * Get a new cursor if we don't have a placeholder or the existing one + * has been invalidated. + */ + if (!lip || (__psint_t)lip & 1) { + lip = __xfs_trans_ail_cursor_last(ailp, lsn); + + if (!lip) { + /* The list is empty, so just splice and return. */ + if (cur) + cur->item = NULL; + list_splice(list, &ailp->xa_ail); + return; + } } - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; + /* + * Our cursor points to the item we want to insert _after_, so we have + * to update the cursor to point to the end of the list we are splicing + * in so that it points to the correct location for the next splice. + * i.e. before the splice + * + * lsn -> lsn -> lsn + x -> lsn + x ... + * ^ + * | cursor points here + * + * After the splice we have: + * + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... + * ^ ^ + * | cursor points here | needs to move here + * + * So we set the cursor to the last item in the list to be spliced + * before we execute the splice, resulting in the cursor pointing to + * the correct item after the splice occurs. + */ + if (cur) { + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); + cur->item = next_lip; } - - ASSERT(&next_lip->li_ail == &ailp->xa_ail || - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); - - list_splice_init(list, &next_lip->li_ail); + list_splice(list, &lip->li_ail); } /* @@ -645,6 +711,7 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -674,7 +741,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, &tmp, lsn); + xfs_ail_splice(ailp, cur, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6b164e9e9a1..c0cb4089032 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -82,6 +82,7 @@ struct xfs_ail { extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -90,7 +91,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -- cgit v1.2.3-70-g09d2