diff options
-rw-r--r-- | drivers/edac/amd64_edac.c | 137 |
1 files changed, 92 insertions, 45 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 1ceb8e27637..c431544519b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1039,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, int channel, csrow; u32 page, offset; + error_address_to_page_and_offset(sys_addr, &page, &offset); + + /* + * Find out which node the error address belongs to. This may be + * different from the node that detected the error. + */ + src_mci = find_mc_by_sys_addr(mci, sys_addr); + if (!src_mci) { + amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", + (unsigned long)sys_addr); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + page, offset, syndrome, + -1, -1, -1, + EDAC_MOD_STR, + "failed to map error addr to a node", + NULL); + return; + } + + /* Now map the sys_addr to a CSROW */ + csrow = sys_addr_to_csrow(src_mci, sys_addr); + if (csrow < 0) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + page, offset, syndrome, + -1, -1, -1, + EDAC_MOD_STR, + "failed to map error addr to a csrow", + NULL); + return; + } + /* CHIPKILL enabled */ if (pvt->nbcfg & NBCFG_CHIPKILL) { channel = get_channel_from_ecc_syndrome(mci, syndrome); @@ -1048,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, * 2 DIMMs is in error. So we need to ID 'both' of them * as suspect. */ - amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible " - "error reporting race\n", syndrome); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - " + "possible error reporting race\n", + syndrome); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + page, offset, syndrome, + csrow, -1, -1, + EDAC_MOD_STR, + "unknown syndrome - possible error reporting race", + NULL); return; } } else { @@ -1065,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, channel = ((sys_addr & BIT(3)) != 0); } - /* - * Find out which node the error address belongs to. This may be - * different from the node that detected the error. - */ - src_mci = find_mc_by_sys_addr(mci, sys_addr); - if (!src_mci) { - amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", - (unsigned long)sys_addr); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); - return; - } - - /* Now map the sys_addr to a CSROW */ - csrow = sys_addr_to_csrow(src_mci, sys_addr); - if (csrow < 0) { - edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR); - } else { - error_address_to_page_and_offset(sys_addr, &page, &offset); - - edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow, - channel, EDAC_MOD_STR); - } + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, + page, offset, syndrome, + csrow, channel, -1, + EDAC_MOD_STR, "", NULL); } static int ddr2_cs_size(unsigned i, bool dct_width) @@ -1592,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, u32 page, offset; int nid, csrow, chan = 0; + error_address_to_page_and_offset(sys_addr, &page, &offset); + csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); if (csrow < 0) { - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + page, offset, syndrome, + -1, -1, -1, + EDAC_MOD_STR, + "failed to map error addr to a csrow", + NULL); return; } - error_address_to_page_and_offset(sys_addr, &page, &offset); - /* * We need the syndromes for channel detection only when we're * ganged. Otherwise @chan should already contain the channel at @@ -1609,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, if (dct_ganging_enabled(pvt)) chan = get_channel_from_ecc_syndrome(mci, syndrome); - if (chan >= 0) - edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan, - EDAC_MOD_STR); - else - /* - * Channel unknown, report all channels on this CSROW as failed. - */ - for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++) - edac_mc_handle_ce(mci, page, offset, syndrome, - csrow, chan, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + page, offset, syndrome, + csrow, chan, -1, + EDAC_MOD_STR, "", NULL); } /* @@ -1899,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) /* Ensure that the Error Address is VALID */ if (!(m->status & MCI_STATUS_ADDRV)) { amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 0, 0, 0, + -1, -1, -1, + EDAC_MOD_STR, + "HW has no ERROR_ADDRESS available", + NULL); return; } @@ -1923,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) if (!(m->status & MCI_STATUS_ADDRV)) { amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 0, 0, 0, + -1, -1, -1, + EDAC_MOD_STR, + "HW has no ERROR_ADDRESS available", + NULL); return; } sys_addr = get_error_address(m); + error_address_to_page_and_offset(sys_addr, &page, &offset); /* * Find out which node the error address belongs to. This may be @@ -1937,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) if (!src_mci) { amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", (unsigned long)sys_addr); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + page, offset, 0, + -1, -1, -1, + EDAC_MOD_STR, + "ERROR ADDRESS NOT mapped to a MC", NULL); return; } @@ -1947,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) if (csrow < 0) { amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", (unsigned long)sys_addr); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + page, offset, 0, + -1, -1, -1, + EDAC_MOD_STR, + "ERROR ADDRESS NOT mapped to CS", + NULL); } else { - error_address_to_page_and_offset(sys_addr, &page, &offset); - edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + page, offset, 0, + csrow, -1, -1, + EDAC_MOD_STR, "", NULL); } } @@ -2515,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct amd64_pvt *pvt = NULL; struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; int err = 0, ret; u8 nid = get_node_id(F2); @@ -2549,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2) goto err_siblings; ret = -ENOMEM; - mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = pvt->csels[0].b_cnt; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = pvt->channel_count; + layers[1].is_virt_csrow = false; + mci = new_edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); if (!mci) goto err_siblings; |