summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c959
1 files changed, 502 insertions, 457 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 981af0f6504..00cd7f3a8f5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,7 +179,7 @@ xlog_bread_noalign(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -193,12 +193,8 @@ xlog_bread_noalign(
bp->b_io_length = nbblks;
bp->b_error = 0;
- if (XFS_FORCED_SHUTDOWN(log->l_mp))
- return XFS_ERROR(EIO);
-
- xfs_buf_iorequest(bp);
- error = xfs_buf_iowait(bp);
- if (error)
+ error = xfs_buf_submit_wait(bp);
+ if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
xfs_buf_ioerror_alert(bp, __func__);
return error;
}
@@ -268,7 +264,7 @@ xlog_bwrite(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -330,14 +326,14 @@ xlog_header_check_recover(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(1)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp,
"dirty log entry has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(2)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -364,7 +360,7 @@ xlog_header_check_mount(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_mount",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -378,12 +374,14 @@ xlog_recover_iodone(
* We're not going to bother about retrying
* this during recovery. One strike!
*/
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_force_shutdown(bp->b_target->bt_mount,
- SHUTDOWN_META_IO_ERROR);
+ if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+ xfs_buf_ioerror_alert(bp, __func__);
+ xfs_force_shutdown(bp->b_target->bt_mount,
+ SHUTDOWN_META_IO_ERROR);
+ }
}
bp->b_iodone = NULL;
- xfs_buf_ioend(bp, 0);
+ xfs_buf_ioend(bp);
}
/*
@@ -462,7 +460,7 @@ xlog_find_verify_cycle(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < log->l_sectBBsize)
- return ENOMEM;
+ return -ENOMEM;
}
for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
@@ -524,7 +522,7 @@ xlog_find_verify_log_record(
if (!(bp = xlog_get_bp(log, num_blks))) {
if (!(bp = xlog_get_bp(log, 1)))
- return ENOMEM;
+ return -ENOMEM;
smallmem = 1;
} else {
error = xlog_bread(log, start_blk, num_blks, bp, &offset);
@@ -539,7 +537,7 @@ xlog_find_verify_log_record(
xfs_warn(log->l_mp,
"Log inconsistent (didn't find previous header)");
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out;
}
@@ -564,7 +562,7 @@ xlog_find_verify_log_record(
* will be called again for the end of the physical log.
*/
if (i == -1) {
- error = -1;
+ error = 1;
goto out;
}
@@ -628,7 +626,12 @@ xlog_find_head(
int error, log_bbnum = log->l_logBBsize;
/* Is the end of the log device zeroed? */
- if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
+ error = xlog_find_zeroed(log, &first_blk);
+ if (error < 0) {
+ xfs_warn(log->l_mp, "empty log check failed");
+ return error;
+ }
+ if (error == 1) {
*return_head_blk = first_blk;
/* Is the whole lot zeroed? */
@@ -641,15 +644,12 @@ xlog_find_head(
}
return 0;
- } else if (error) {
- xfs_warn(log->l_mp, "empty log check failed");
- return error;
}
first_blk = 0; /* get cycle # of 1st block */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -818,29 +818,29 @@ validate_head:
start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
/* start ptr at last block ptr before head_blk */
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
} else {
start_blk = 0;
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error < 0)
+ goto bp_err;
+ if (error == 1) {
/* We hit the beginning of the log during our search */
start_blk = log_bbnum - (num_scan_bblks - head_blk);
new_blk = log_bbnum;
ASSERT(start_blk <= INT_MAX &&
(xfs_daddr_t) log_bbnum-start_blk >= 0);
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log,
- start_blk, &new_blk,
- (int)head_blk)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk,
+ &new_blk, (int)head_blk);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
if (new_blk != log_bbnum)
head_blk = new_blk;
@@ -911,7 +911,7 @@ xlog_find_tail(
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
if (*head_blk == 0) { /* special case */
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -961,7 +961,7 @@ xlog_find_tail(
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
xlog_put_bp(bp);
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* find blk_no of tail of log */
@@ -1092,8 +1092,8 @@ done:
*
* Return:
* 0 => the log is completely written to
- * -1 => use *blk_no as the first block of the log
- * >0 => error has occurred
+ * 1 => use *blk_no as the first block of the log
+ * <0 => error has occurred
*/
STATIC int
xlog_find_zeroed(
@@ -1112,7 +1112,7 @@ xlog_find_zeroed(
/* check totally zeroed log */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
goto bp_err;
@@ -1121,7 +1121,7 @@ xlog_find_zeroed(
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
xlog_put_bp(bp);
- return -1;
+ return 1;
}
/* check partially zeroed log */
@@ -1141,7 +1141,7 @@ xlog_find_zeroed(
*/
xfs_warn(log->l_mp,
"Log inconsistent or not a log (last==0, first!=1)");
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto bp_err;
}
@@ -1179,19 +1179,18 @@ xlog_find_zeroed(
* Potentially backup over partial log record write. We don't need
* to search the end of the log because we know it is zero.
*/
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &last_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
- goto bp_err;
+ error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
+ goto bp_err;
*blk_no = last_blk;
bp_err:
xlog_put_bp(bp);
if (error)
return error;
- return -1;
+ return 1;
}
/*
@@ -1251,7 +1250,7 @@ xlog_write_log_records(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < sectbb)
- return ENOMEM;
+ return -ENOMEM;
}
/* We may need to do a read at the start to fill in part of
@@ -1354,7 +1353,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block + (log->l_logBBsize - head_block);
} else {
@@ -1366,7 +1365,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block - head_block;
}
@@ -1444,160 +1443,6 @@ xlog_clear_stale_blocks(
******************************************************************************
*/
-STATIC xlog_recover_t *
-xlog_recover_find_tid(
- struct hlist_head *head,
- xlog_tid_t tid)
-{
- xlog_recover_t *trans;
-
- hlist_for_each_entry(trans, head, r_list) {
- if (trans->r_log_tid == tid)
- return trans;
- }
- return NULL;
-}
-
-STATIC void
-xlog_recover_new_tid(
- struct hlist_head *head,
- xlog_tid_t tid,
- xfs_lsn_t lsn)
-{
- xlog_recover_t *trans;
-
- trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
- trans->r_log_tid = tid;
- trans->r_lsn = lsn;
- INIT_LIST_HEAD(&trans->r_itemq);
-
- INIT_HLIST_NODE(&trans->r_list);
- hlist_add_head(&trans->r_list, head);
-}
-
-STATIC void
-xlog_recover_add_item(
- struct list_head *head)
-{
- xlog_recover_item_t *item;
-
- item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
- INIT_LIST_HEAD(&item->ri_list);
- list_add_tail(&item->ri_list, head);
-}
-
-STATIC int
-xlog_recover_add_to_cont_trans(
- struct xlog *log,
- struct xlog_recover *trans,
- xfs_caddr_t dp,
- int len)
-{
- xlog_recover_item_t *item;
- xfs_caddr_t ptr, old_ptr;
- int old_len;
-
- if (list_empty(&trans->r_itemq)) {
- /* finish copying rest of trans header */
- xlog_recover_add_item(&trans->r_itemq);
- ptr = (xfs_caddr_t) &trans->r_theader +
- sizeof(xfs_trans_header_t) - len;
- memcpy(ptr, dp, len); /* d, s, l */
- return 0;
- }
- /* take the tail entry */
- item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
-
- old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
- old_len = item->ri_buf[item->ri_cnt-1].i_len;
-
- ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
- memcpy(&ptr[old_len], dp, len); /* d, s, l */
- item->ri_buf[item->ri_cnt-1].i_len += len;
- item->ri_buf[item->ri_cnt-1].i_addr = ptr;
- trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
- return 0;
-}
-
-/*
- * The next region to add is the start of a new region. It could be
- * a whole region or it could be the first part of a new region. Because
- * of this, the assumption here is that the type and size fields of all
- * format structures fit into the first 32 bits of the structure.
- *
- * This works because all regions must be 32 bit aligned. Therefore, we
- * either have both fields or we have neither field. In the case we have
- * neither field, the data part of the region is zero length. We only have
- * a log_op_header and can throw away the header since a new one will appear
- * later. If we have at least 4 bytes, then we can determine how many regions
- * will appear in the current log item.
- */
-STATIC int
-xlog_recover_add_to_trans(
- struct xlog *log,
- struct xlog_recover *trans,
- xfs_caddr_t dp,
- int len)
-{
- xfs_inode_log_format_t *in_f; /* any will do */
- xlog_recover_item_t *item;
- xfs_caddr_t ptr;
-
- if (!len)
- return 0;
- if (list_empty(&trans->r_itemq)) {
- /* we need to catch log corruptions here */
- if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
- xfs_warn(log->l_mp, "%s: bad header magic number",
- __func__);
- ASSERT(0);
- return XFS_ERROR(EIO);
- }
- if (len == sizeof(xfs_trans_header_t))
- xlog_recover_add_item(&trans->r_itemq);
- memcpy(&trans->r_theader, dp, len); /* d, s, l */
- return 0;
- }
-
- ptr = kmem_alloc(len, KM_SLEEP);
- memcpy(ptr, dp, len);
- in_f = (xfs_inode_log_format_t *)ptr;
-
- /* take the tail entry */
- item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
- if (item->ri_total != 0 &&
- item->ri_total == item->ri_cnt) {
- /* tail item is in use, get a new one */
- xlog_recover_add_item(&trans->r_itemq);
- item = list_entry(trans->r_itemq.prev,
- xlog_recover_item_t, ri_list);
- }
-
- if (item->ri_total == 0) { /* first region to be added */
- if (in_f->ilf_size == 0 ||
- in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
- xfs_warn(log->l_mp,
- "bad number of regions (%d) in inode log format",
- in_f->ilf_size);
- ASSERT(0);
- kmem_free(ptr);
- return XFS_ERROR(EIO);
- }
-
- item->ri_total = in_f->ilf_size;
- item->ri_buf =
- kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
- KM_SLEEP);
- }
- ASSERT(item->ri_total > item->ri_cnt);
- /* Description region is ri_buf[0] */
- item->ri_buf[item->ri_cnt].i_addr = ptr;
- item->ri_buf[item->ri_cnt].i_len = len;
- item->ri_cnt++;
- trace_xfs_log_recover_item_add(log, trans, item, 0);
- return 0;
-}
-
/*
* Sort the log items in the transaction.
*
@@ -1702,7 +1547,7 @@ xlog_recover_reorder_trans(
*/
if (!list_empty(&sort_list))
list_splice_init(&sort_list, &trans->r_itemq);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out;
}
}
@@ -1943,7 +1788,7 @@ xlog_recover_do_inode_buffer(
item, bp);
XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
@@ -2125,6 +1970,17 @@ xlog_recover_validate_buf_type(
__uint16_t magic16;
__uint16_t magicda;
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
magicda = be16_to_cpu(info->magic);
@@ -2162,8 +2018,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_agf_buf_ops;
break;
case XFS_BLFT_AGFL_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_AGFL_MAGIC) {
xfs_warn(mp, "Bad AGFL block magic!");
ASSERT(0);
@@ -2196,10 +2050,6 @@ xlog_recover_validate_buf_type(
#endif
break;
case XFS_BLFT_DINO_BUF:
- /*
- * we get here with inode allocation buffers, not buffers that
- * track unlinked list changes.
- */
if (magic16 != XFS_DINODE_MAGIC) {
xfs_warn(mp, "Bad INODE block magic!");
ASSERT(0);
@@ -2279,8 +2129,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_attr3_leaf_buf_ops;
break;
case XFS_BLFT_ATTR_RMT_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
xfs_warn(mp, "Bad attr remote magic!");
ASSERT(0);
@@ -2387,16 +2235,7 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- /*
- * We can only do post recovery validation on items on CRC enabled
- * fielsystems as we need to know when the buffer was written to be able
- * to determine if we should have replayed the item. If we replay old
- * metadata over a newer buffer, then it will enter a temporarily
- * inconsistent state resulting in verification failures. Hence for now
- * just avoid the verification stage for non-crc filesystems
- */
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2404,8 +2243,11 @@ xlog_recover_do_reg_buffer(
* Simple algorithm: if we have found a QUOTAOFF log item of the same type
* (ie. USR or GRP), then just toss this buffer away; don't recover it.
* Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
*/
-STATIC void
+STATIC bool
xlog_recover_do_dquot_buffer(
struct xfs_mount *mp,
struct xlog *log,
@@ -2420,9 +2262,8 @@ xlog_recover_do_dquot_buffer(
/*
* Filesystems are required to send in quota flags at mount time.
*/
- if (mp->m_qflags == 0) {
- return;
- }
+ if (!mp->m_qflags)
+ return false;
type = 0;
if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
@@ -2435,9 +2276,10 @@ xlog_recover_do_dquot_buffer(
* This type of quotas was turned off, so ignore this buffer
*/
if (log->l_quotaoffs_flag & type)
- return;
+ return false;
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ return true;
}
/*
@@ -2496,7 +2338,7 @@ xlog_recover_buffer_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
buf_flags, NULL);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
@@ -2504,23 +2346,44 @@ xlog_recover_buffer_pass2(
}
/*
- * recover the buffer only if we get an LSN from it and it's less than
+ * Recover the buffer only if we get an LSN from it and it's less than
* the lsn of the transaction we are replaying.
+ *
+ * Note that we have to be extremely careful of readahead here.
+ * Readahead does not attach verfiers to the buffers so if we don't
+ * actually do any replay after readahead because of the LSN we found
+ * in the buffer if more recent than that current transaction then we
+ * need to attach the verifier directly. Failure to do so can lead to
+ * future recovery actions (e.g. EFI and unlinked list recovery) can
+ * operate on the buffers and they won't get the verifier attached. This
+ * can lead to blocks on disk having the correct content but a stale
+ * CRC.
+ *
+ * It is safe to assume these clean buffers are currently up to date.
+ * If the buffer is dirtied by a later transaction being replayed, then
+ * the verifier will be reset to match whatever recover turns that
+ * buffer into.
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
goto out_release;
+ }
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+ if (error)
+ goto out_release;
} else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ bool dirty;
+
+ dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ if (!dirty)
+ goto out_release;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
- if (error)
- goto out_release;
/*
* Perform delayed write on the buffer. Asynchronous writes will be
@@ -2598,7 +2461,7 @@ xfs_recover_inode_owner_change(
ip = xfs_inode_alloc(mp, in_f->ilf_ino);
if (!ip)
- return ENOMEM;
+ return -ENOMEM;
/* instantiate the inode */
xfs_dinode_from_disk(&ip->i_d, dip);
@@ -2676,7 +2539,7 @@ xlog_recover_inode_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
&xfs_inode_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error;
}
error = bp->b_error;
@@ -2697,7 +2560,7 @@ xlog_recover_inode_pass2(
__func__, dip, bp, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
dicp = item->ri_buf[1].i_addr;
@@ -2707,7 +2570,7 @@ xlog_recover_inode_pass2(
__func__, item, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2764,7 +2627,7 @@ xlog_recover_inode_pass2(
"%s: Bad regular inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
} else if (unlikely(S_ISDIR(dicp->di_mode))) {
@@ -2777,7 +2640,7 @@ xlog_recover_inode_pass2(
"%s: Bad dir inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
}
@@ -2790,7 +2653,7 @@ xlog_recover_inode_pass2(
__func__, item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
@@ -2800,7 +2663,7 @@ xlog_recover_inode_pass2(
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
isize = xfs_icdinode_size(dicp->di_version);
@@ -2810,7 +2673,7 @@ xlog_recover_inode_pass2(
xfs_alert(mp,
"%s: Bad inode log record length %d, rec ptr 0x%p",
__func__, item->ri_buf[1].i_len, item);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2898,7 +2761,7 @@ xlog_recover_inode_pass2(
default:
xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
ASSERT(0);
- error = EIO;
+ error = -EIO;
goto out_release;
}
}
@@ -2919,7 +2782,7 @@ out_release:
error:
if (need_free)
kmem_free(in_f);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -2946,7 +2809,7 @@ xlog_recover_quotaoff_pass1(
if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
log->l_quotaoffs_flag |= XFS_DQ_GROUP;
- return (0);
+ return 0;
}
/*
@@ -2971,17 +2834,17 @@ xlog_recover_dquot_pass2(
* Filesystems are required to send in quota flags at mount time.
*/
if (mp->m_qflags == 0)
- return (0);
+ return 0;
recddq = item->ri_buf[1].i_addr;
if (recddq == NULL) {
xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
item->ri_buf[1].i_len, __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
@@ -2990,7 +2853,7 @@ xlog_recover_dquot_pass2(
type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
ASSERT(type);
if (log->l_quotaoffs_flag & type)
- return (0);
+ return 0;
/*
* At this point we know that quota was _not_ turned off.
@@ -3007,12 +2870,19 @@ xlog_recover_dquot_pass2(
error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
"xlog_recover_dquot_pass2 (log copy)");
if (error)
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(dq_f->qlf_len == 1);
+ /*
+ * At this point we are assuming that the dquots have been allocated
+ * and hence the buffer has valid dquots stamped in it. It should,
+ * therefore, pass verifier validation. If the dquot is bad, then the
+ * we'll return an error here, so we don't need to specifically check
+ * the dquot in the buffer after the verifier has run.
+ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
- NULL);
+ &xfs_dquot_buf_ops);
if (error)
return error;
@@ -3020,18 +2890,6 @@ xlog_recover_dquot_pass2(
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
- * At least the magic num portion should be on disk because this
- * was among a chunk of dquots created earlier, and we did some
- * minimal initialization then.
- */
- error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_dquot_pass2");
- if (error) {
- xfs_buf_relse(bp);
- return XFS_ERROR(EIO);
- }
-
- /*
* If the dquot has an LSN in it, recover the dquot only if it's less
* than the lsn of the transaction we are replaying.
*/
@@ -3178,38 +3036,38 @@ xlog_recover_do_icreate_pass2(
icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
if (icl->icl_type != XFS_LI_ICREATE) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
- return EINVAL;
+ return -EINVAL;
}
if (icl->icl_size != 1) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
- return EINVAL;
+ return -EINVAL;
}
agno = be32_to_cpu(icl->icl_ag);
if (agno >= mp->m_sb.sb_agcount) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
- return EINVAL;
+ return -EINVAL;
}
agbno = be32_to_cpu(icl->icl_agbno);
if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
- return EINVAL;
+ return -EINVAL;
}
isize = be32_to_cpu(icl->icl_isize);
if (isize != mp->m_sb.sb_inodesize) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
- return EINVAL;
+ return -EINVAL;
}
count = be32_to_cpu(icl->icl_count);
if (!count) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
- return EINVAL;
+ return -EINVAL;
}
length = be32_to_cpu(icl->icl_length);
if (!length || length >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
- return EINVAL;
+ return -EINVAL;
}
/* existing allocation is fixed value */
@@ -3218,7 +3076,7 @@ xlog_recover_do_icreate_pass2(
if (count != mp->m_ialloc_inos ||
length != mp->m_ialloc_blks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
- return EINVAL;
+ return -EINVAL;
}
/*
@@ -3240,31 +3098,6 @@ xlog_recover_do_icreate_pass2(
return 0;
}
-/*
- * Free up any resources allocated by the transaction
- *
- * Remember that EFIs, EFDs, and IUNLINKs are handled later.
- */
-STATIC void
-xlog_recover_free_trans(
- struct xlog_recover *trans)
-{
- xlog_recover_item_t *item, *n;
- int i;
-
- list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
- /* Free the regions in the item. */
- list_del(&item->ri_list);
- for (i = 0; i < item->ri_cnt; i++)
- kmem_free(item->ri_buf[i].i_addr);
- /* Free the item itself */
- kmem_free(item->ri_buf);
- kmem_free(item);
- }
- /* Free the transaction recover structure */
- kmem_free(trans);
-}
-
STATIC void
xlog_recover_buffer_ra_pass2(
struct xlog *log,
@@ -3389,7 +3222,7 @@ xlog_recover_commit_pass1(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3425,7 +3258,7 @@ xlog_recover_commit_pass2(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3514,22 +3347,309 @@ out:
if (!list_empty(&done_list))
list_splice_init(&done_list, &trans->r_itemq);
- xlog_recover_free_trans(trans);
-
error2 = xfs_buf_delwri_submit(&buffer_list);
return error ? error : error2;
}
+STATIC void
+xlog_recover_add_item(
+ struct list_head *head)
+{
+ xlog_recover_item_t *item;
+
+ item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
+ INIT_LIST_HEAD(&item->ri_list);
+ list_add_tail(&item->ri_list, head);
+}
+
STATIC int
-xlog_recover_unmount_trans(
- struct xlog *log)
+xlog_recover_add_to_cont_trans(
+ struct xlog *log,
+ struct xlog_recover *trans,
+ xfs_caddr_t dp,
+ int len)
{
- /* Do nothing now */
- xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
+ xlog_recover_item_t *item;
+ xfs_caddr_t ptr, old_ptr;
+ int old_len;
+
+ if (list_empty(&trans->r_itemq)) {
+ /* finish copying rest of trans header */
+ xlog_recover_add_item(&trans->r_itemq);
+ ptr = (xfs_caddr_t) &trans->r_theader +
+ sizeof(xfs_trans_header_t) - len;
+ memcpy(ptr, dp, len);
+ return 0;
+ }
+ /* take the tail entry */
+ item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+
+ old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
+ old_len = item->ri_buf[item->ri_cnt-1].i_len;
+
+ ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
+ memcpy(&ptr[old_len], dp, len);
+ item->ri_buf[item->ri_cnt-1].i_len += len;
+ item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+ trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
+ return 0;
+}
+
+/*
+ * The next region to add is the start of a new region. It could be
+ * a whole region or it could be the first part of a new region. Because
+ * of this, the assumption here is that the type and size fields of all
+ * format structures fit into the first 32 bits of the structure.
+ *
+ * This works because all regions must be 32 bit aligned. Therefore, we
+ * either have both fields or we have neither field. In the case we have
+ * neither field, the data part of the region is zero length. We only have
+ * a log_op_header and can throw away the header since a new one will appear
+ * later. If we have at least 4 bytes, then we can determine how many regions
+ * will appear in the current log item.
+ */
+STATIC int
+xlog_recover_add_to_trans(
+ struct xlog *log,
+ struct xlog_recover *trans,
+ xfs_caddr_t dp,
+ int len)
+{
+ xfs_inode_log_format_t *in_f; /* any will do */
+ xlog_recover_item_t *item;
+ xfs_caddr_t ptr;
+
+ if (!len)
+ return 0;
+ if (list_empty(&trans->r_itemq)) {
+ /* we need to catch log corruptions here */
+ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
+ xfs_warn(log->l_mp, "%s: bad header magic number",
+ __func__);
+ ASSERT(0);
+ return -EIO;
+ }
+ if (len == sizeof(xfs_trans_header_t))
+ xlog_recover_add_item(&trans->r_itemq);
+ memcpy(&trans->r_theader, dp, len);
+ return 0;
+ }
+
+ ptr = kmem_alloc(len, KM_SLEEP);
+ memcpy(ptr, dp, len);
+ in_f = (xfs_inode_log_format_t *)ptr;
+
+ /* take the tail entry */
+ item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+ if (item->ri_total != 0 &&
+ item->ri_total == item->ri_cnt) {
+ /* tail item is in use, get a new one */
+ xlog_recover_add_item(&trans->r_itemq);
+ item = list_entry(trans->r_itemq.prev,
+ xlog_recover_item_t, ri_list);
+ }
+
+ if (item->ri_total == 0) { /* first region to be added */
+ if (in_f->ilf_size == 0 ||
+ in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+ xfs_warn(log->l_mp,
+ "bad number of regions (%d) in inode log format",
+ in_f->ilf_size);
+ ASSERT(0);
+ kmem_free(ptr);
+ return -EIO;
+ }
+
+ item->ri_total = in_f->ilf_size;
+ item->ri_buf =
+ kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ KM_SLEEP);
+ }
+ ASSERT(item->ri_total > item->ri_cnt);
+ /* Description region is ri_buf[0] */
+ item->ri_buf[item->ri_cnt].i_addr = ptr;
+ item->ri_buf[item->ri_cnt].i_len = len;
+ item->ri_cnt++;
+ trace_xfs_log_recover_item_add(log, trans, item, 0);
return 0;
}
/*
+ * Free up any resources allocated by the transaction
+ *
+ * Remember that EFIs, EFDs, and IUNLINKs are handled later.
+ */
+STATIC void
+xlog_recover_free_trans(
+ struct xlog_recover *trans)
+{
+ xlog_recover_item_t *item, *n;
+ int i;
+
+ list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+ /* Free the regions in the item. */
+ list_del(&item->ri_list);
+ for (i = 0; i < item->ri_cnt; i++)
+ kmem_free(item->ri_buf[i].i_addr);
+ /* Free the item itself */
+ kmem_free(item->ri_buf);
+ kmem_free(item);
+ }
+ /* Free the transaction recover structure */
+ kmem_free(trans);
+}
+
+/*
+ * On error or completion, trans is freed.
+ */
+STATIC int
+xlog_recovery_process_trans(
+ struct xlog *log,
+ struct xlog_recover *trans,
+ xfs_caddr_t dp,
+ unsigned int len,
+ unsigned int flags,
+ int pass)
+{
+ int error = 0;
+ bool freeit = false;
+
+ /* mask off ophdr transaction container flags */
+ flags &= ~XLOG_END_TRANS;
+ if (flags & XLOG_WAS_CONT_TRANS)
+ flags &= ~XLOG_CONTINUE_TRANS;
+
+ /*
+ * Callees must not free the trans structure. We'll decide if we need to
+ * free it or not based on the operation being done and it's result.
+ */
+ switch (flags) {
+ /* expected flag values */
+ case 0:
+ case XLOG_CONTINUE_TRANS:
+ error = xlog_recover_add_to_trans(log, trans, dp, len);
+ break;
+ case XLOG_WAS_CONT_TRANS:
+ error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
+ break;
+ case XLOG_COMMIT_TRANS:
+ error = xlog_recover_commit_trans(log, trans, pass);
+ /* success or fail, we are now done with this transaction. */
+ freeit = true;
+ break;
+
+ /* unexpected flag values */
+ case XLOG_UNMOUNT_TRANS:
+ /* just skip trans */
+ xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
+ freeit = true;
+ break;
+ case XLOG_START_TRANS:
+ default:
+ xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
+ ASSERT(0);
+ error = -EIO;
+ break;
+ }
+ if (error || freeit)
+ xlog_recover_free_trans(trans);
+ return error;
+}
+
+/*
+ * Lookup the transaction recovery structure associated with the ID in the
+ * current ophdr. If the transaction doesn't exist and the start flag is set in
+ * the ophdr, then allocate a new transaction for future ID matches to find.
+ * Either way, return what we found during the lookup - an existing transaction
+ * or nothing.
+ */
+STATIC struct xlog_recover *
+xlog_recover_ophdr_to_trans(
+ struct hlist_head rhash[],
+ struct xlog_rec_header *rhead,
+ struct xlog_op_header *ohead)
+{
+ struct xlog_recover *trans;
+ xlog_tid_t tid;
+ struct hlist_head *rhp;
+
+ tid = be32_to_cpu(ohead->oh_tid);
+ rhp = &rhash[XLOG_RHASH(tid)];
+ hlist_for_each_entry(trans, rhp, r_list) {
+ if (trans->r_log_tid == tid)
+ return trans;
+ }
+
+ /*
+ * skip over non-start transaction headers - we could be
+ * processing slack space before the next transaction starts
+ */
+ if (!(ohead->oh_flags & XLOG_START_TRANS))
+ return NULL;
+
+ ASSERT(be32_to_cpu(ohead->oh_len) == 0);
+
+ /*
+ * This is a new transaction so allocate a new recovery container to
+ * hold the recovery ops that will follow.
+ */
+ trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
+ trans->r_log_tid = tid;
+ trans->r_lsn = be64_to_cpu(rhead->h_lsn);
+ INIT_LIST_HEAD(&trans->r_itemq);
+ INIT_HLIST_NODE(&trans->r_list);
+ hlist_add_head(&trans->r_list, rhp);
+
+ /*
+ * Nothing more to do for this ophdr. Items to be added to this new
+ * transaction will be in subsequent ophdr containers.
+ */
+ return NULL;
+}
+
+STATIC int
+xlog_recover_process_ophdr(
+ struct xlog *log,
+ struct hlist_head rhash[],
+ struct xlog_rec_header *rhead,
+ struct xlog_op_header *ohead,
+ xfs_caddr_t dp,
+ xfs_caddr_t end,
+ int pass)
+{
+ struct xlog_recover *trans;
+ unsigned int len;
+
+ /* Do we understand who wrote this op? */
+ if (ohead->oh_clientid != XFS_TRANSACTION &&
+ ohead->oh_clientid != XFS_LOG) {
+ xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
+ __func__, ohead->oh_clientid);
+ ASSERT(0);
+ return -EIO;
+ }
+
+ /*
+ * Check the ophdr contains all the data it is supposed to contain.
+ */
+ len = be32_to_cpu(ohead->oh_len);
+ if (dp + len > end) {
+ xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
+ WARN_ON(1);
+ return -EIO;
+ }
+
+ trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
+ if (!trans) {
+ /* nothing to do, so skip over this ophdr */
+ return 0;
+ }
+
+ return xlog_recovery_process_trans(log, trans, dp, len,
+ ohead->oh_flags, pass);
+}
+
+/*
* There are two valid states of the r_state field. 0 indicates that the
* transaction structure is in a normal state. We have either seen the
* start of the transaction or the last operation we added was not a partial
@@ -3546,86 +3666,30 @@ xlog_recover_process_data(
xfs_caddr_t dp,
int pass)
{
- xfs_caddr_t lp;
+ struct xlog_op_header *ohead;
+ xfs_caddr_t end;
int num_logops;
- xlog_op_header_t *ohead;
- xlog_recover_t *trans;
- xlog_tid_t tid;
int error;
- unsigned long hash;
- uint flags;
- lp = dp + be32_to_cpu(rhead->h_len);
+ end = dp + be32_to_cpu(rhead->h_len);
num_logops = be32_to_cpu(rhead->h_num_logops);
/* check the log format matches our own - else we can't recover */
if (xlog_header_check_recover(log->l_mp, rhead))
- return (XFS_ERROR(EIO));
-
- while ((dp < lp) && num_logops) {
- ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
- ohead = (xlog_op_header_t *)dp;
- dp += sizeof(xlog_op_header_t);
- if (ohead->oh_clientid != XFS_TRANSACTION &&
- ohead->oh_clientid != XFS_LOG) {
- xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
- __func__, ohead->oh_clientid);
- ASSERT(0);
- return (XFS_ERROR(EIO));
- }
- tid = be32_to_cpu(ohead->oh_tid);
- hash = XLOG_RHASH(tid);
- trans = xlog_recover_find_tid(&rhash[hash], tid);
- if (trans == NULL) { /* not found; add new tid */
- if (ohead->oh_flags & XLOG_START_TRANS)
- xlog_recover_new_tid(&rhash[hash], tid,
- be64_to_cpu(rhead->h_lsn));
- } else {
- if (dp + be32_to_cpu(ohead->oh_len) > lp) {
- xfs_warn(log->l_mp, "%s: bad length 0x%x",
- __func__, be32_to_cpu(ohead->oh_len));
- WARN_ON(1);
- return (XFS_ERROR(EIO));
- }
- flags = ohead->oh_flags & ~XLOG_END_TRANS;
- if (flags & XLOG_WAS_CONT_TRANS)
- flags &= ~XLOG_CONTINUE_TRANS;
- switch (flags) {
- case XLOG_COMMIT_TRANS:
- error = xlog_recover_commit_trans(log,
- trans, pass);
- break;
- case XLOG_UNMOUNT_TRANS:
- error = xlog_recover_unmount_trans(log);
- break;
- case XLOG_WAS_CONT_TRANS:
- error = xlog_recover_add_to_cont_trans(log,
- trans, dp,
- be32_to_cpu(ohead->oh_len));
- break;
- case XLOG_START_TRANS:
- xfs_warn(log->l_mp, "%s: bad transaction",
- __func__);
- ASSERT(0);
- error = XFS_ERROR(EIO);
- break;
- case 0:
- case XLOG_CONTINUE_TRANS:
- error = xlog_recover_add_to_trans(log, trans,
- dp, be32_to_cpu(ohead->oh_len));
- break;
- default:
- xfs_warn(log->l_mp, "%s: bad flag 0x%x",
- __func__, flags);
- ASSERT(0);
- error = XFS_ERROR(EIO);
- break;
- }
- if (error) {
- xlog_recover_free_trans(trans);
- return error;
- }
- }
+ return -EIO;
+
+ while ((dp < end) && num_logops) {
+
+ ohead = (struct xlog_op_header *)dp;
+ dp += sizeof(*ohead);
+ ASSERT(dp <= end);
+
+ /* errors will abort recovery */
+ error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
+ dp, end, pass);
+ if (error)
+ return error;
+
dp += be32_to_cpu(ohead->oh_len);
num_logops--;
}
@@ -3669,7 +3733,7 @@ xlog_recover_process_efi(
*/
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
xfs_efi_release(efip, efip->efi_format.efi_nextents);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3969,7 +4033,7 @@ xlog_unpack_data_crc(
* CRC protection by punting an error back up the stack.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
return 0;
@@ -4018,14 +4082,14 @@ xlog_valid_rec_header(
if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(
(!rhead->h_version ||
(be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
__func__, be32_to_cpu(rhead->h_version));
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* LR body must have data or it wouldn't have been written */
@@ -4033,12 +4097,12 @@ xlog_valid_rec_header(
if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -4081,7 +4145,7 @@ xlog_do_recovery_pass(
*/
hbp = xlog_get_bp(log, 1);
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, tail_blk, 1, hbp, &offset);
if (error)
@@ -4110,49 +4174,21 @@ xlog_do_recovery_pass(
}
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
dbp = xlog_get_bp(log, BTOBB(h_size));
if (!dbp) {
xlog_put_bp(hbp);
- return ENOMEM;
+ return -ENOMEM;
}
memset(rhash, 0, sizeof(rhash));
- if (tail_blk <= head_blk) {
- for (blk_no = tail_blk; blk_no < head_blk; ) {
- error = xlog_bread(log, blk_no, hblks, hbp, &offset);
- if (error)
- goto bread_err2;
-
- rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, blk_no);
- if (error)
- goto bread_err2;
-
- /* blocks in data section */
- bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
- error = xlog_bread(log, blk_no + hblks, bblks, dbp,
- &offset);
- if (error)
- goto bread_err2;
-
- error = xlog_unpack_data(rhead, offset, log);
- if (error)
- goto bread_err2;
-
- error = xlog_recover_process_data(log,
- rhash, rhead, offset, pass);
- if (error)
- goto bread_err2;
- blk_no += bblks + hblks;
- }
- } else {
+ blk_no = tail_blk;
+ if (tail_blk > head_blk) {
/*
* Perform recovery around the end of the physical log.
* When the head is not on the same cycle number as the tail,
- * we can't do a sequential recovery as above.
+ * we can't do a sequential recovery.
*/
- blk_no = tail_blk;
while (blk_no < log->l_logBBsize) {
/*
* Check for header wrapping around physical end-of-log
@@ -4266,34 +4302,35 @@ xlog_do_recovery_pass(
ASSERT(blk_no >= log->l_logBBsize);
blk_no -= log->l_logBBsize;
+ }
- /* read first part of physical log */
- while (blk_no < head_blk) {
- error = xlog_bread(log, blk_no, hblks, hbp, &offset);
- if (error)
- goto bread_err2;
+ /* read first part of physical log */
+ while (blk_no < head_blk) {
+ error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+ if (error)
+ goto bread_err2;
- rhead = (xlog_rec_header_t *)offset;
- error = xlog_valid_rec_header(log, rhead, blk_no);
- if (error)
- goto bread_err2;
+ rhead = (xlog_rec_header_t *)offset;
+ error = xlog_valid_rec_header(log, rhead, blk_no);
+ if (error)
+ goto bread_err2;
- bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
- error = xlog_bread(log, blk_no+hblks, bblks, dbp,
- &offset);
- if (error)
- goto bread_err2;
+ /* blocks in data section */
+ bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
+ error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+ &offset);
+ if (error)
+ goto bread_err2;
- error = xlog_unpack_data(rhead, offset, log);
- if (error)
- goto bread_err2;
+ error = xlog_unpack_data(rhead, offset, log);
+ if (error)
+ goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
- blk_no += bblks + hblks;
- }
+ error = xlog_recover_process_data(log, rhash,
+ rhead, offset, pass);
+ if (error)
+ goto bread_err2;
+ blk_no += bblks + hblks;
}
bread_err2:
@@ -4388,7 +4425,7 @@ xlog_do_recover(
* If IO errors happened during recovery, bail out.
*/
if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
- return (EIO);
+ return -EIO;
}
/*
@@ -4413,16 +4450,12 @@ xlog_do_recover(
XFS_BUF_UNASYNC(bp);
bp->b_ops = &xfs_sb_buf_ops;
- if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
- xfs_buf_relse(bp);
- return XFS_ERROR(EIO);
- }
-
- xfs_buf_iorequest(bp);
- error = xfs_buf_iowait(bp);
+ error = xfs_buf_submit_wait(bp);
if (error) {
- xfs_buf_ioerror_alert(bp, __func__);
- ASSERT(0);
+ if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ xfs_buf_ioerror_alert(bp, __func__);
+ ASSERT(0);
+ }
xfs_buf_relse(bp);
return error;
}
@@ -4492,7 +4525,19 @@ xlog_recover(
"Please recover the log on a kernel that supports the unknown features.",
(log->l_mp->m_sb.sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
- return EINVAL;
+ return -EINVAL;
+ }
+
+ /*
+ * Delay log recovery if the debug hook is set. This is debug
+ * instrumention to coordinate simulation of I/O failures with
+ * log recovery.
+ */
+ if (xfs_globals.log_recovery_delay) {
+ xfs_notice(log->l_mp,
+ "Delaying log recovery for %d seconds.",
+ xfs_globals.log_recovery_delay);
+ msleep(xfs_globals.log_recovery_delay * 1000);
}
xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",