diff options
author | Bryan O'Sullivan <bos@pathscale.com> | 2007-03-15 14:45:07 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-04-18 20:20:58 -0700 |
commit | 9783ab405844202b452ac673677e6c8f8c9a6a99 (patch) | |
tree | 32aac9ac3ff1089a7ecb05c4ef0b825a95227694 /drivers/infiniband/hw/ipath/ipath_iba6120.c | |
parent | 820054b7ca7a54ba94d89db4b3c53a24d2d66633 (diff) |
IB/ipath: Improve handling and reporting of parity errors
Mostly cleanup.
Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_iba6120.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6120.c | 58 |
1 files changed, 39 insertions, 19 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 5c50383880f..aa2b5194433 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -321,6 +321,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) + +static int ipath_pe_txe_recover(struct ipath_devdata *); + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -394,25 +400,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_stats.sps_txeparity++; - ipath_dbg("Recovering from TXE parity error (%llu), " - "hwerrstatus=%llx\n", - (unsigned long long) ipath_stats.sps_txeparity, - (unsigned long long) hwerrs); - ipath_disarm_senderrbufs(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - return; - } - } + if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) + hwerrs &= ~TXE_PIO_PARITY; if (hwerrs) { /* * if any set that we aren't ignoring only make the @@ -581,6 +570,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND) + ipath_dbg("MemBIST corrected\n"); val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ @@ -1330,6 +1321,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) dd->ipath_irq = 0; } +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip bug can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + * Because we can get lots of false errors, we have no upper limit + * on recovery attempts on those platforms. + */ +static int ipath_pe_txe_recover(struct ipath_devdata *dd) +{ + if (ipath_unordered_wc()) + ipath_dbg("Recovering from TXE PIO parity error\n"); + else { + int cnt = ++ipath_stats.sps_txeparity; + if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { + if (cnt == IPATH_MAX_PARITY_ATTEMPTS) + ipath_dev_err(dd, + "Too many attempts to recover from " + "TXE parity, giving up\n"); + return 0; + } + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + } + ipath_disarm_senderrbufs(dd, 1); + return 1; +} + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device |