diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 8 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 831 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 86 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_inj.c | 25 | ||||
-rw-r--r-- | drivers/edac/amd8131_edac.h | 2 | ||||
-rw-r--r-- | drivers/edac/cell_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/cpc925_edac.c | 9 | ||||
-rw-r--r-- | drivers/edac/e752x_edac.c | 8 | ||||
-rw-r--r-- | drivers/edac/edac_core.h | 13 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 14 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 57 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 9 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 6 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 450 | ||||
-rw-r--r-- | drivers/edac/mce_amd.h | 14 | ||||
-rw-r--r-- | drivers/edac/mce_amd_inj.c | 9 | ||||
-rw-r--r-- | drivers/edac/ppc4xx_edac.c | 6 |
17 files changed, 816 insertions, 735 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index f436a2fa9f3..fe70a341bd8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -75,11 +75,11 @@ config EDAC_MCE bool config EDAC_AMD64 - tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" - depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE + tristate "AMD64 (Opteron, Athlon64) K8, F10h" + depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE help - Support for error detection and correction on the AMD 64 - Families of Memory Controllers (K8, F10h and F11h) + Support for error detection and correction of DRAM ECC errors on + the AMD64 families of memory controllers (K8 and F10h) config EDAC_AMD64_ERROR_INJECTION bool "Sysfs HW Error injection facilities" diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8521401bbd7..4a5ecc58025 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,10 +15,14 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -/* Lookup table for all possible MC control instances */ -struct amd64_pvt; -static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; -static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES]; +/* + * count successfully initialized driver instances for setup_pci_device() + */ +static atomic_t drv_instances = ATOMIC_INIT(0); + +/* Per-node driver instances */ +static struct mem_ctl_info **mcis; +static struct ecc_settings **ecc_stngs; /* * Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and @@ -62,7 +66,7 @@ static int ddr3_dbam[] = { [0] = -1, [5 ... 6] = 1024, [7 ... 8] = 2048, [9 ... 10] = 4096, - [11] = 8192, + [11] = 8192, }; /* @@ -73,7 +77,11 @@ static int ddr3_dbam[] = { [0] = -1, *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate scrubrates[] = { + +struct scrubrate { + u32 scrubval; /* bit pattern for scrub rate */ + u32 bandwidth; /* bandwidth consumed (bytes/sec) */ +} scrubrates[] = { { 0x01, 1600000000UL}, { 0x02, 800000000UL}, { 0x03, 400000000UL}, @@ -117,8 +125,7 @@ struct scrubrate scrubrates[] = { * scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found. */ -static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, - u32 min_scrubrate) +static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) { u32 scrubval; int i; @@ -134,7 +141,7 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, * skip scrub rates which aren't recommended * (see F10 BKDG, F3x58) */ - if (scrubrates[i].scrubval < min_scrubrate) + if (scrubrates[i].scrubval < min_rate) continue; if (scrubrates[i].bandwidth <= new_bw) @@ -148,64 +155,41 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, } scrubval = scrubrates[i].scrubval; - if (scrubval) - edac_printk(KERN_DEBUG, EDAC_MC, - "Setting scrub rate bandwidth: %u\n", - scrubrates[i].bandwidth); - else - edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n"); pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F); + if (scrubval) + return scrubrates[i].bandwidth; + return 0; } -static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) +static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw) { struct amd64_pvt *pvt = mci->pvt_info; - u32 min_scrubrate = 0x0; - - switch (boot_cpu_data.x86) { - case 0xf: - min_scrubrate = K8_MIN_SCRUB_RATE_BITS; - break; - case 0x10: - min_scrubrate = F10_MIN_SCRUB_RATE_BITS; - break; - case 0x11: - min_scrubrate = F11_MIN_SCRUB_RATE_BITS; - break; - default: - amd64_printk(KERN_ERR, "Unsupported family!\n"); - return -EINVAL; - } - return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth, - min_scrubrate); + return __amd64_set_scrub_rate(pvt->F3, bw, pvt->min_scrubrate); } -static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) +static int amd64_get_scrub_rate(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; u32 scrubval = 0; - int status = -1, i; + int i, retval = -EINVAL; - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval); + amd64_read_pci_cfg(pvt->F3, K8_SCRCTRL, &scrubval); scrubval = scrubval & 0x001F; - edac_printk(KERN_DEBUG, EDAC_MC, - "pci-read, sdram scrub control value: %d \n", scrubval); + amd64_debug("pci-read, sdram scrub control value: %d\n", scrubval); for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { if (scrubrates[i].scrubval == scrubval) { - *bw = scrubrates[i].bandwidth; - status = 0; + retval = scrubrates[i].bandwidth; break; } } - - return status; + return retval; } /* Map from a CSROW entry to the mask entry that operates on it */ @@ -314,9 +298,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, if (unlikely((intlv_en != 0x01) && (intlv_en != 0x03) && (intlv_en != 0x07))) { - amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from " - "IntlvEn field of DRAM Base Register for node 0: " - "this probably indicates a BIOS bug.\n", intlv_en); + amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en); return NULL; } @@ -332,11 +314,9 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, /* sanity test for sys_addr */ if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) { - amd64_printk(KERN_WARNING, - "%s(): sys_addr 0x%llx falls outside base/limit " - "address range for node %d with node interleaving " - "enabled.\n", - __func__, sys_addr, node_id); + amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" + "range for node %d with node interleaving enabled.\n", + __func__, sys_addr, node_id); return NULL; } @@ -788,9 +768,8 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr)); if (csrow == -1) - amd64_mc_printk(mci, KERN_ERR, - "Failed to translate InputAddr to csrow for " - "address 0x%lx\n", (unsigned long)sys_addr); + amd64_mc_err(mci, "Failed to translate InputAddr to csrow for " + "address 0x%lx\n", (unsigned long)sys_addr); return csrow; } @@ -801,21 +780,6 @@ static u16 extract_syndrome(struct err_regs *err) return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00); } -static void amd64_cpu_display_info(struct amd64_pvt *pvt) -{ - if (boot_cpu_data.x86 == 0x11) - edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n"); - else if (boot_cpu_data.x86 == 0x10) - edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n"); - else if (boot_cpu_data.x86 == 0xf) - edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n", - (pvt->ext_model >= K8_REV_F) ? - "Rev F or later" : "Rev E or earlier"); - else - /* we'll hardly ever ever get here */ - edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n"); -} - /* * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable. @@ -893,8 +857,7 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) return; } - amd64_printk(KERN_INFO, "using %s syndromes.\n", - ((pvt->syn_type == 8) ? "x8" : "x4")); + amd64_info("using %s syndromes.\n", ((pvt->syn_type == 8) ? "x8" : "x4")); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) @@ -915,10 +878,10 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) /* Read in both of DBAM registers */ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) { - amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM0, &pvt->dbam0); + amd64_read_pci_cfg(pvt->F2, DBAM0, &pvt->dbam0); if (boot_cpu_data.x86 >= 0x10) - amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM1, &pvt->dbam1); + amd64_read_pci_cfg(pvt->F2, DBAM1, &pvt->dbam1); } /* @@ -965,14 +928,8 @@ static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) pvt->dcsm_mask = REV_F_F1Xh_DCSM_MASK_BITS; pvt->dcs_mask_notused = REV_F_F1Xh_DCS_NOTUSED_BITS; pvt->dcs_shift = REV_F_F1Xh_DCS_SHIFT; - - if (boot_cpu_data.x86 == 0x11) { - pvt->cs_count = 4; - pvt->num_dcsm = 2; - } else { - pvt->cs_count = 8; - pvt->num_dcsm = 4; - } + pvt->cs_count = 8; + pvt->num_dcsm = 4; } } @@ -987,14 +944,14 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) for (cs = 0; cs < pvt->cs_count; cs++) { reg = K8_DCSB0 + (cs * 4); - if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs])) + if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsb0[cs])) debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's base */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSB1 + (cs * 4); - if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsb1[cs])) debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb1[cs], reg); @@ -1005,14 +962,14 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) for (cs = 0; cs < pvt->num_dcsm; cs++) { reg = K8_DCSM0 + (cs * 4); - if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsm0[cs])) + if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsm0[cs])) debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's mask */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSM1 + (cs * 4); - if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsm1[cs])) debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm1[cs], reg); @@ -1022,7 +979,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) } } -static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) +static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs) { enum mem_type type; @@ -1035,7 +992,7 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; } - debugf1(" Memory type is: %s\n", edac_mem_types[type]); + amd64_info("CS%d: %s\n", cs, edac_mem_types[type]); return type; } @@ -1053,17 +1010,16 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) { int flag, err = 0; - err = amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); + err = amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0); if (err) return err; - if ((boot_cpu_data.x86_model >> 4) >= K8_REV_F) { + if (pvt->ext_model >= K8_REV_F) /* RevF (NPT) and later */ flag = pvt->dclr0 & F10_WIDTH_128; - } else { + else /* RevE and earlier */ flag = pvt->dclr0 & REVE_WIDTH_128; - } /* not used */ pvt->dclr1 = 0; @@ -1090,14 +1046,14 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) u32 low; u32 off = dram << 3; /* 8 bytes between DRAM entries */ - amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_BASE_LOW + off, &low); + amd64_read_pci_cfg(pvt->F1, K8_DRAM_BASE_LOW + off, &low); /* Extract parts into separate data entries */ pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8; pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7; pvt->dram_rw_en[dram] = (low & 0x3); - amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_LIMIT_LOW + off, &low); + amd64_read_pci_cfg(pvt->F1, K8_DRAM_LIMIT_LOW + off, &low); /* * Extract parts into separate data entries. Limit is the HIGHEST memory @@ -1127,9 +1083,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * 2 DIMMs is in error. So we need to ID 'both' of them * as suspect. */ - amd64_mc_printk(mci, KERN_WARNING, - "unknown syndrome 0x%04x - possible " - "error reporting race\n", syndrome); + amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible " + "error reporting race\n", syndrome); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; } @@ -1151,8 +1106,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, */ src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!src_mci) { - amd64_mc_printk(mci, KERN_ERR, - "failed to map error address 0x%lx to a node\n", + amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", (unsigned long)sys_addr); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; @@ -1220,7 +1174,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) * both controllers since DIMMs can be placed in either one. */ for (i = 0; i < ARRAY_SIZE(dbams); i++) { - if (amd64_read_pci_cfg(pvt->dram_f2_ctl, dbams[i], &dbam)) + if (amd64_read_pci_cfg(pvt->F2, dbams[i], &dbam)) goto err_reg; for (j = 0; j < 4; j++) { @@ -1234,7 +1188,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) if (channels > 2) channels = 2; - debugf0("MCT channel count: %d\n", channels); + amd64_info("MCT channel count: %d\n", channels); return channels; @@ -1255,31 +1209,6 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode) return dbam_map[cs_mode]; } -/* Enable extended configuration access via 0xCF8 feature */ -static void amd64_setup(struct amd64_pvt *pvt) -{ - u32 reg; - - amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); - - pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG); - reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG; - pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg); -} - -/* Restore the extended configuration access via 0xCF8 feature */ -static void amd64_teardown(struct amd64_pvt *pvt) -{ - u32 reg; - - amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); - - reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG; - if (pvt->flags.cf8_extcfg) - reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG; - pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg); -} - static u64 f10_get_error_address(struct mem_ctl_info *mci, struct err_regs *info) { @@ -1301,10 +1230,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_BASE_HIGH + (dram << 3); /* read the 'raw' DRAM BASE Address register */ - amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_base); - - /* Read from the ECS data register */ - amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_base); + amd64_read_pci_cfg(pvt->F1, low_offset, &low_base); + amd64_read_pci_cfg(pvt->F1, high_offset, &high_base); /* Extract parts into separate data entries */ pvt->dram_rw_en[dram] = (low_base & 0x3); @@ -1321,10 +1248,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3); /* read the 'raw' LIMIT registers */ - amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_limit); - - /* Read from the ECS data register for the HIGH portion */ - amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit); + amd64_read_pci_cfg(pvt->F1, low_offset, &low_limit); + amd64_read_pci_cfg(pvt->F1, high_offset, &high_limit); pvt->dram_DstNode[dram] = (low_limit & 0x7); pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7; @@ -1341,7 +1266,7 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) { - if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, + if (!amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_LOW, &pvt->dram_ctl_select_low)) { debugf0("F2x110 (DCTL Sel. Low): 0x%08x, " "High range addresses at: 0x%x\n", @@ -1367,7 +1292,7 @@ static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) dct_sel_interleave_addr(pvt)); } - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, + amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_HIGH, &pvt->dram_ctl_select_high); } @@ -1496,7 +1421,7 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs) int cs_found = -EINVAL; int csrow; - mci = mci_lookup[nid]; + mci = mcis[nid]; if (!mci) return cs_found; @@ -1572,7 +1497,7 @@ static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range, debugf1(" HoleOffset=0x%x HoleValid=0x%x IntlvSel=0x%x\n", hole_off, hole_valid, intlv_sel); - if (intlv_en || + if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en))) return -EINVAL; @@ -1738,28 +1663,17 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE) size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); - edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n", - dimm * 2, size0 << factor, - dimm * 2 + 1, size1 << factor); + amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", + dimm * 2, size0 << factor, + dimm * 2 + 1, size1 << factor); } } -/* - * There currently are 3 types type of MC devices for AMD Athlon/Opterons - * (as per PCI DEVICE_IDs): - * - * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI - * DEVICE ID, even though there is differences between the different Revisions - * (CG,D,E,F). - * - * Family F10h and F11h. - * - */ static struct amd64_family_type amd64_family_types[] = { [K8_CPUS] = { - .ctl_name = "RevF", - .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC, + .ctl_name = "K8", + .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, + .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC, .ops = { .early_channel_count = k8_early_channel_count, .get_error_address = k8_get_error_address, @@ -1769,22 +1683,9 @@ static struct amd64_family_type amd64_family_types[] = { } }, [F10_CPUS] = { - .ctl_name = "Family 10h", - .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC, - .ops = { - .early_channel_count = f10_early_channel_count, - .get_error_address = f10_get_error_address, - .read_dram_base_limit = f10_read_dram_base_limit, - .read_dram_ctl_register = f10_read_dram_ctl_register, - .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, - .dbam_to_cs = f10_dbam_to_chip_select, - } - }, - [F11_CPUS] = { - .ctl_name = "Family 11h", - .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP, - .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC, + .ctl_name = "F10h", + .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, + .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC, .ops = { .early_channel_count = f10_early_channel_count, .get_error_address = f10_get_error_address, @@ -1970,8 +1871,7 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) ARRAY_SIZE(x4_vectors), pvt->syn_type); else { - amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n", - __func__, pvt->syn_type); + amd64_warn("Illegal syndrome type: %u\n", pvt->syn_type); return err_sym; } @@ -1989,17 +1889,15 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, u64 sys_addr; /* Ensure that the Error Address is VALID */ - if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) { - amd64_mc_printk(mci, KERN_ERR, - "HW has no ERROR_ADDRESS available\n"); + if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) { + amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; } sys_addr = pvt->ops->get_error_address(mci, info); - amd64_mc_printk(mci, KERN_ERR, - "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); + amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr); } @@ -2016,9 +1914,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, log_mci = mci; - if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) { - amd64_mc_printk(mci, KERN_CRIT, - "HW has no ERROR_ADDRESS available\n"); + if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) { + amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); return; } @@ -2031,9 +1928,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, */ src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!src_mci) { - amd64_mc_printk(mci, KERN_CRIT, - "ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n", - (unsigned long)sys_addr); + amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", + (unsigned long)sys_addr); edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); return; } @@ -2042,9 +1938,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, csrow = sys_addr_to_csrow(log_mci, sys_addr); if (csrow < 0) { - amd64_mc_printk(mci, KERN_CRIT, - "ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n", - (unsigned long)sys_addr); + amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", + (unsigned long)sys_addr); edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); } else { error_address_to_page_and_offset(sys_addr, &page, &offset); @@ -2055,8 +1950,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, struct err_regs *info) { - u32 ec = ERROR_CODE(info->nbsl); - u32 xec = EXT_ERROR_CODE(info->nbsl); + u16 ec = EC(info->nbsl); + u8 xec = XEC(info->nbsl, 0x1f); int ecc_type = (info->nbsh >> 13) & 0x3; /* Bail early out if this was an 'observed' error */ @@ -2075,7 +1970,7 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) { - struct mem_ctl_info *mci = mci_lookup[node_id]; + struct mem_ctl_info *mci = mcis[node_id]; struct err_regs regs; regs.nbsl = (u32) m->status; @@ -2099,75 +1994,50 @@ void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) } /* - * Input: - * 1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer - * 2) AMD Family index value - * - * Ouput: - * Upon return of 0, the following filled in: - * - * struct pvt->addr_f1_ctl - * struct pvt->misc_f3_ctl - * - * Filled in with related device funcitions of 'dram_f2_ctl' - * These devices are "reserved" via the pci_get_device() - * - * Upon return of 1 (error status): - * - * Nothing reserved + * Use pvt->F2 which contains the F2 CPU PCI device to get the related + * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error. */ -static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx) +static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) { - const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx]; - /* Reserve the ADDRESS MAP Device */ - pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor, - amd64_dev->addr_f1_ctl, - pvt->dram_f2_ctl); - - if (!pvt->addr_f1_ctl) { - amd64_printk(KERN_ERR, "error address map device not found: " - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl); - return 1; + pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2); + if (!pvt->F1) { + amd64_err("error address map device not found: " + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_AMD, f1_id); + return -ENODEV; } /* Reserve the MISC Device */ - pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor, - amd64_dev->misc_f3_ctl, - pvt->dram_f2_ctl); + pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2); + if (!pvt->F3) { + pci_dev_put(pvt->F1); + pvt->F1 = NULL; - if (!pvt->misc_f3_ctl) { - pci_dev_put(pvt->addr_f1_ctl); - pvt->addr_f1_ctl = NULL; + amd64_err("error F3 device not found: " + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_AMD, f3_id); - amd64_printk(KERN_ERR, "error miscellaneous device not found: " - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl); - return 1; + return -ENODEV; } - - debugf1(" Addr Map device PCI Bus ID:\t%s\n", - pci_name(pvt->addr_f1_ctl)); - debugf1(" DRAM MEM-CTL PCI Bus ID:\t%s\n", - pci_name(pvt->dram_f2_ctl)); - debugf1(" Misc device PCI Bus ID:\t%s\n", - pci_name(pvt->misc_f3_ctl)); + debugf1("F1: %s\n", pci_name(pvt->F1)); + debugf1("F2: %s\n", pci_name(pvt->F2)); + debugf1("F3: %s\n", pci_name(pvt->F3)); return 0; } -static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt) +static void free_mc_sibling_devs(struct amd64_pvt *pvt) { - pci_dev_put(pvt->addr_f1_ctl); - pci_dev_put(pvt->misc_f3_ctl); + pci_dev_put(pvt->F1); + pci_dev_put(pvt->F3); } /* * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs) */ -static void amd64_read_mc_registers(struct amd64_pvt *pvt) +static void read_mc_regs(struct amd64_pvt *pvt) { u64 msr_val; u32 tmp; @@ -2188,9 +2058,7 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) } else debugf0(" TOP_MEM2 disabled.\n"); - amd64_cpu_display_info(pvt); - - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap); + amd64_read_pci_cfg(pvt->F3, K8_NBCAP, &pvt->nbcap); if (pvt->ops->read_dram_ctl_register) pvt->ops->read_dram_ctl_register(pvt); @@ -2227,21 +2095,20 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_read_dct_base_mask(pvt); - amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar); + amd64_read_pci_cfg(pvt->F1, K8_DHAR, &pvt->dhar); amd64_read_dbam_reg(pvt); - amd64_read_pci_cfg(pvt->misc_f3_ctl, - F10_ONLINE_SPARE, &pvt->online_spare); + amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare); - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); + amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0); + amd64_read_pci_cfg(pvt->F2, F10_DCHR_0, &pvt->dchr0); if (boot_cpu_data.x86 >= 0x10) { if (!dct_ganging_enabled(pvt)) { - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); + amd64_read_pci_cfg(pvt->F2, F10_DCLR_1, &pvt->dclr1); + amd64_read_pci_cfg(pvt->F2, F10_DCHR_1, &pvt->dchr1); } - amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp); + amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); } if (boot_cpu_data.x86 == 0x10 && @@ -2321,21 +2188,22 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt) * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. */ -static int amd64_init_csrows(struct mem_ctl_info *mci) +static int init_csrows(struct mem_ctl_info *mci) { struct csrow_info *csrow; - struct amd64_pvt *pvt; + struct amd64_pvt *pvt = mci->pvt_info; u64 input_addr_min, input_addr_max, sys_addr; + u32 val; int i, empty = 1; - pvt = mci->pvt_info; + amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val); - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg); + pvt->nbcfg = val; + pvt->ctl_error_info.nbcfg = val; - debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg, - (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", - (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled" - ); + debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE)); for (i = 0; i < pvt->cs_count; i++) { csrow = &mci->csrows[i]; @@ -2359,7 +2227,7 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) csrow->page_mask = ~mask_from_dct_mask(pvt, i); /* 8 bytes of resolution */ - csrow->mtype = amd64_determine_memory_type(pvt); + csrow->mtype = amd64_determine_memory_type(pvt, i); debugf1(" for MC node %d csrow %d:\n", pvt->mc_node_id, i); debugf1(" input_addr_min: 0x%lx input_addr_max: 0x%lx\n", @@ -2404,8 +2272,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid) bool ret = false; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { - amd64_printk(KERN_WARNING, "%s: error allocating mask\n", - __func__); + amd64_warn("%s: Error allocating mask\n", __func__); return false; } @@ -2431,18 +2298,17 @@ out: return ret; } -static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) { cpumask_var_t cmask; int cpu; if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { - amd64_printk(KERN_WARNING, "%s: error allocating mask\n", - __func__); + amd64_warn("%s: error allocating mask\n", __func__); return false; } - get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id); + get_cpus_on_this_dct_cpumask(cmask, nid); rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); @@ -2452,14 +2318,14 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) if (on) { if (reg->l & K8_MSR_MCGCTL_NBE) - pvt->flags.nb_mce_enable = 1; + s->flags.nb_mce_enable = 1; reg->l |= K8_MSR_MCGCTL_NBE; } else { /* * Turn off NB MCE reporting only when it was off before */ - if (!pvt->flags.nb_mce_enable) + if (!s->flags.nb_mce_enable) reg->l &= ~K8_MSR_MCGCTL_NBE; } } @@ -2470,92 +2336,92 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) return 0; } -static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) +static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, + struct pci_dev *F3) { - struct amd64_pvt *pvt = mci->pvt_info; + bool ret = true; u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value); + if (toggle_ecc_err_reporting(s, nid, ON)) { + amd64_warn("Error enabling ECC reporting over MCGCTL!\n"); + return false; + } + + amd64_read_pci_cfg(F3, K8_NBCTL, &value); - /* turn on UECCn and CECCEn bits */ - pvt->old_nbctl = value & mask; - pvt->nbctl_mcgctl_saved = 1; + /* turn on UECCEn and CECCEn bits */ + s->old_nbctl = value & mask; + s->nbctl_valid = true; value |= mask; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); - - if (amd64_toggle_ecc_err_reporting(pvt, ON)) - amd64_printk(KERN_WARNING, "Error enabling ECC reporting over " - "MCGCTL!\n"); + pci_write_config_dword(F3, K8_NBCTL, value); - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); + amd64_read_pci_cfg(F3, K8_NBCFG, &value); - debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, - (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", - (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); + debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + nid, value, + !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE)); if (!(value & K8_NBCFG_ECC_ENABLE)) { - amd64_printk(KERN_WARNING, - "This node reports that DRAM ECC is " - "currently Disabled; ENABLING now\n"); + amd64_warn("DRAM ECC disabled on this node, enabling...\n"); - pvt->flags.nb_ecc_prev = 0; + s->flags.nb_ecc_prev = 0; /* Attempt to turn on DRAM ECC Enable */ value |= K8_NBCFG_ECC_ENABLE; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); + pci_write_config_dword(F3, K8_NBCFG, value); - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); + amd64_read_pci_cfg(F3, K8_NBCFG, &value); if (!(value & K8_NBCFG_ECC_ENABLE)) { - amd64_printk(KERN_WARNING, - "Hardware rejects Enabling DRAM ECC checking\n" - "Check memory DIMM configuration\n"); + amd64_warn("Hardware rejected DRAM ECC enable," + "check memory DIMM configuration.\n"); + ret = false; } else { - amd64_printk(KERN_DEBUG, - "Hardware accepted DRAM ECC Enable\n"); + amd64_info("Hardware accepted DRAM ECC Enable\n"); } } else { - pvt->flags.nb_ecc_prev = 1; + s->flags.nb_ecc_prev = 1; } - debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, - (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", - (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); + debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + nid, value, + !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE)); - pvt->ctl_error_info.nbcfg = value; + return ret; } -static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) +static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, + struct pci_dev *F3) { u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; - if (!pvt->nbctl_mcgctl_saved) + if (!s->nbctl_valid) return; - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value); + amd64_read_pci_cfg(F3, K8_NBCTL, &value); value &= ~mask; - value |= pvt->old_nbctl; + value |= s->old_nbctl; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); + pci_write_config_dword(F3, K8_NBCTL, value); - /* restore previous BIOS DRAM ECC "off" setting which we force-enabled */ - if (!pvt->flags.nb_ecc_prev) { - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); + /* restore previous BIOS DRAM ECC "off" setting we force-enabled */ + if (!s->flags.nb_ecc_prev) { + amd64_read_pci_cfg(F3, K8_NBCFG, &value); value &= ~K8_NBCFG_ECC_ENABLE; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); + pci_write_config_dword(F3, K8_NBCFG, value); } /* restore the NB Enable MCGCTL bit */ - if (amd64_toggle_ecc_err_reporting(pvt, OFF)) - amd64_printk(KERN_WARNING, "Error restoring NB MCGCTL settings!\n"); + if (toggle_ecc_err_reporting(s, nid, OFF)) + amd64_warn("Error restoring NB MCGCTL settings!\n"); } /* - * EDAC requires that the BIOS have ECC enabled before taking over the - * processing of ECC errors. This is because the BIOS can properly initialize - * the memory system completely. A command line option allows to force-enable - * hardware ECC later in amd64_enable_ecc_error_reporting(). + * EDAC requires that the BIOS have ECC enabled before + * taking over the processing of ECC errors. A command line + * option allows to force-enable hardware ECC later in + * enable_ecc_error_reporting(). */ static const char *ecc_msg = "ECC disabled in the BIOS or no ECC capability, module will not load.\n" @@ -2563,38 +2429,28 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) +static bool ecc_enabled(struct pci_dev *F3, u8 nid) { u32 value; - u8 ecc_enabled = 0; + u8 ecc_en = 0; bool nb_mce_en = false; - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); + amd64_read_pci_cfg(F3, K8_NBCFG, &value); - ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); - if (!ecc_enabled) - amd64_printk(KERN_NOTICE, "This node reports that Memory ECC " - "is currently disabled, set F3x%x[22] (%s).\n", - K8_NBCFG, pci_name(pvt->misc_f3_ctl)); - else - amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n"); + ecc_en = !!(value & K8_NBCFG_ECC_ENABLE); + amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); - nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); + nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid); if (!nb_mce_en) - amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR " + amd64_notice("NB MCE bank disabled, set MSR " "0x%08x[4] on node %d to enable.\n", - MSR_IA32_MCG_CTL, pvt->mc_node_id); + MSR_IA32_MCG_CTL, nid); - if (!ecc_enabled || !nb_mce_en) { - if (!ecc_enable_override) { - amd64_printk(KERN_NOTICE, "%s", ecc_msg); - return -ENODEV; - } else { - amd64_printk(KERN_WARNING, "Forcing ECC checking on!\n"); - } + if (!ecc_en || !nb_mce_en) { + amd64_notice("%s", ecc_msg); + return false; } - - return 0; + return true; } struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + @@ -2603,22 +2459,23 @@ struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } }; -static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci) +static void set_mc_sysfs_attrs(struct mem_ctl_info *mci) { unsigned int i = 0, j = 0; for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++) sysfs_attrs[i] = amd64_dbg_attrs[i]; - for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++) - sysfs_attrs[i] = amd64_inj_attrs[j]; + if (boot_cpu_data.x86 >= 0x10) + for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++) + sysfs_attrs[i] = amd64_inj_attrs[j]; sysfs_attrs[i] = terminator; mci->mc_driver_sysfs_attributes = sysfs_attrs; } -static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci) +static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -2634,8 +2491,8 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci) mci->edac_cap = amd64_determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; mci->mod_ver = EDAC_AMD64_VERSION; - mci->ctl_name = get_amd_family_name(pvt->mc_type_index); - mci->dev_name = pci_name(pvt->dram_f2_ctl); + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F2); mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ @@ -2644,111 +2501,94 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci) } /* - * Init stuff for this DRAM Controller device. - * - * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration - * Space feature MUST be enabled on ALL Processors prior to actually reading - * from the ECS registers. Since the loading of the module can occur on any - * 'core', and cores don't 'see' all the other processors ECS data when the - * others are NOT enabled. Our solution is to first enable ECS access in this - * routine on all processors, gather some data in a amd64_pvt structure and - * later come back in a finish-setup function to perform that final - * initialization. See also amd64_init_2nd_stage() for that. + * returns a pointer to the family descriptor on success, NULL otherwise. */ -static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl, - int mc_type_index) +static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) +{ + u8 fam = boot_cpu_data.x86; + struct amd64_family_type *fam_type = NULL; + + switch (fam) { + case 0xf: + fam_type = &amd64_family_types[K8_CPUS]; + pvt->ops = &amd64_family_types[K8_CPUS].ops; + pvt->ctl_name = fam_type->ctl_name; + pvt->min_scrubrate = K8_MIN_SCRUB_RATE_BITS; + break; + case 0x10: + fam_type = &amd64_family_types[F10_CPUS]; + pvt->ops = &amd64_family_types[F10_CPUS].ops; + pvt->ctl_name = fam_type->ctl_name; + pvt->min_scrubrate = F10_MIN_SCRUB_RATE_BITS; + break; + + default: + amd64_err("Unsupported family!\n"); + return NULL; + } + + pvt->ext_model = boot_cpu_data.x86_model >> 4; + + amd64_info("%s %sdetected (node %d).\n", pvt->ctl_name, + (fam == 0xf ? + (pvt->ext_model >= K8_REV_F ? "revF or later " + : "revE or earlier ") + : ""), pvt->mc_node_id); + return fam_type; +} + +static int amd64_init_one_instance(struct pci_dev *F2) { struct amd64_pvt *pvt = NULL; + struct amd64_family_type *fam_type = NULL; + struct mem_ctl_info *mci = NULL; int err = 0, ret; + u8 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); if (!pvt) - goto err_exit; + goto err_ret; - pvt->mc_node_id = get_node_id(dram_f2_ctl); + pvt->mc_node_id = nid; + pvt->F2 = F2; - pvt->dram_f2_ctl = dram_f2_ctl; - pvt->ext_model = boot_cpu_data.x86_model >> 4; - pvt->mc_type_index = mc_type_index; - pvt->ops = family_ops(mc_type_index); + ret = -EINVAL; + fam_type = amd64_per_family_init(pvt); + if (!fam_type) + goto err_free; - /* - * We have the dram_f2_ctl device as an argument, now go reserve its - * sibling devices from the PCI system. - */ ret = -ENODEV; - err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index); + err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id); if (err) goto err_free; - ret = -EINVAL; - err = amd64_check_ecc_enabled(pvt); - if (err) - goto err_put; - - /* - * Key operation here: setup of HW prior to performing ops on it. Some - * setup is required to access ECS data. After this is performed, the - * 'teardown' function must be called upon error and normal exit paths. - */ - if (boot_cpu_data.x86 >= 0x10) - amd64_setup(pvt); - - /* - * Save the pointer to the private data for use in 2nd initialization - * stage - */ - pvt_lookup[pvt->mc_node_id] = pvt; - - return 0; - -err_put: - amd64_free_mc_sibling_devices(pvt); - -err_free: - kfree(pvt); - -err_exit: - return ret; -} - -/* - * This is the finishing stage of the init code. Needs to be performed after all - * MCs' hardware have been prepped for accessing extended config space. - */ -static int amd64_init_2nd_stage(struct amd64_pvt *pvt) -{ - int node_id = pvt->mc_node_id; - struct mem_ctl_info *mci; - int ret = -ENODEV; - - amd64_read_mc_registers(pvt); + read_mc_regs(pvt); /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance - * tables in the 'mci' structure + * tables in the 'mci' structure. */ + ret = -EINVAL; pvt->channel_count = pvt->ops->early_channel_count(pvt); if (pvt->channel_count < 0) - goto err_exit; + goto err_siblings; ret = -ENOMEM; - mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id); + mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, nid); if (!mci) - goto err_exit; + goto err_siblings; mci->pvt_info = pvt; + mci->dev = &pvt->F2->dev; - mci->dev = &pvt->dram_f2_ctl->dev; - amd64_setup_mci_misc_attributes(mci); + setup_mci_misc_attrs(mci); - if (amd64_init_csrows(mci)) + if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; - amd64_enable_ecc_error_reporting(mci); - amd64_set_mc_sysfs_attributes(mci); + set_mc_sysfs_attrs(mci); ret = -ENODEV; if (edac_mc_add_mc(mci)) { @@ -2756,54 +2596,77 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) goto err_add_mc; } - mci_lookup[node_id] = mci; - pvt_lookup[node_id] = NULL; - /* register stuff with EDAC MCE */ if (report_gart_errors) amd_report_gart_errors(true); amd_register_ecc_decoder(amd64_decode_bus_error); + mcis[nid] = mci; + + atomic_inc(&drv_instances); + return 0; err_add_mc: edac_mc_free(mci); -err_exit: - debugf0("failure to init 2nd stage: ret=%d\n", ret); - - amd64_restore_ecc_error_reporting(pvt); - - if (boot_cpu_data.x86 > 0xf) - amd64_teardown(pvt); +err_siblings: + free_mc_sibling_devs(pvt); - amd64_free_mc_sibling_devices(pvt); - - kfree(pvt_lookup[pvt->mc_node_id]); - pvt_lookup[node_id] = NULL; +err_free: + kfree(pvt); +err_ret: return ret; } - -static int __devinit amd64_init_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, + const struct pci_device_id *mc_type) { + u8 nid = get_node_id(pdev); + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct ecc_settings *s; int ret = 0; - debugf0("(MC node=%d,mc_type='%s')\n", get_node_id(pdev), - get_amd_family_name(mc_type->driver_data)); - ret = pci_enable_device(pdev); - if (ret < 0) - ret = -EIO; - else - ret = amd64_probe_one_instance(pdev, mc_type->driver_data); - - if (ret < 0) + if (ret < 0) { debugf0("ret=%d\n", ret); + return -EIO; + } + + ret = -ENOMEM; + s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); + if (!s) + goto err_out; + + ecc_stngs[nid] = s; + + if (!ecc_enabled(F3, nid)) { + ret = -ENODEV; + + if (!ecc_enable_override) + goto err_enable; + + amd64_warn("Forcing ECC on!\n"); + + if (!enable_ecc_error_reporting(s, nid, F3)) + goto err_enable; + } + ret = amd64_init_one_instance(pdev); + if (ret < 0) { + amd64_err("Error probing instance: %d\n", nid); + restore_ecc_error_reporting(s, nid, F3); + } + + return ret; + +err_enable: + kfree(s); + ecc_stngs[nid] = NULL; + +err_out: return ret; } @@ -2811,6 +2674,9 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; + u8 nid = get_node_id(pdev); + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct ecc_settings *s = ecc_stngs[nid]; /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&pdev->dev); @@ -2819,20 +2685,20 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) pvt = mci->pvt_info; - amd64_restore_ecc_error_reporting(pvt); + restore_ecc_error_reporting(s, nid, F3); - if (boot_cpu_data.x86 > 0xf) - amd64_teardown(pvt); - - amd64_free_mc_sibling_devices(pvt); + free_mc_sibling_devs(pvt); /* unregister from EDAC MCE */ amd_report_gart_errors(false); amd_unregister_ecc_decoder(amd64_decode_bus_error); + kfree(ecc_stngs[nid]); + ecc_stngs[nid] = NULL; + /* Free the EDAC CORE resources */ mci->pvt_info = NULL; - mci_lookup[pvt->mc_node_id] = NULL; + mcis[nid] = NULL; kfree(pvt); edac_mc_free(mci); @@ -2851,7 +2717,6 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = { .subdevice = PCI_ANY_ID, .class = 0, .class_mask = 0, - .driver_data = K8_CPUS }, { .vendor = PCI_VENDOR_ID_AMD, @@ -2860,16 +2725,6 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = { .subdevice = PCI_ANY_ID, .class = 0, .class_mask = 0, - .driver_data = F10_CPUS - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_11H_NB_DRAM, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = F11_CPUS }, {0, } }; @@ -2877,12 +2732,12 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table); static struct pci_driver amd64_pci_driver = { .name = EDAC_MOD_STR, - .probe = amd64_init_one_instance, + .probe = amd64_probe_one_instance, .remove = __devexit_p(amd64_remove_one_instance), .id_table = amd64_pci_table, }; -static void amd64_setup_pci_device(void) +static void setup_pci_device(void) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -2890,13 +2745,12 @@ static void amd64_setup_pci_device(void) if (amd64_ctl_pci) return; - mci = mci_lookup[0]; + mci = mcis[0]; if (mci) { pvt = mci->pvt_info; amd64_ctl_pci = - edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev, - EDAC_MOD_STR); + edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); if (!amd64_ctl_pci) { pr_warning("%s(): Unable to create PCI control\n", @@ -2910,51 +2764,50 @@ static void amd64_setup_pci_device(void) static int __init amd64_edac_init(void) { - int nb, err = -ENODEV; - bool load_ok = false; + int err = -ENODEV; edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n"); opstate_init(); - if (cache_k8_northbridges() < 0) + if (amd_cache_northbridges() < 0) + goto err_ret; + + err = -ENOMEM; + mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL); + ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL); + if (!(mcis && ecc_stngs)) goto err_ret; msrs = msrs_alloc(); if (!msrs) - goto err_ret; + goto err_free; err = pci_register_driver(&amd64_pci_driver); if (err) goto err_pci; - /* - * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd - * amd64_pvt structs. These will be used in the 2nd stage init function - * to finish initialization of the MC instances. - */ err = -ENODEV; - for (nb = 0; nb < k8_northbridges.num; nb++) { - if (!pvt_lookup[nb]) - continue; - - err = amd64_init_2nd_stage(pvt_lookup[nb]); - if (err) - goto err_2nd_stage; + if (!atomic_read(&drv_instances)) + goto err_no_instances; - load_ok = true; - } - - if (load_ok) { - amd64_setup_pci_device(); - return 0; - } + setup_pci_device(); + return 0; -err_2nd_stage: +err_no_instances: pci_unregister_driver(&amd64_pci_driver); + err_pci: msrs_free(msrs); msrs = NULL; + +err_free: + kfree(mcis); + mcis = NULL; + + kfree(ecc_stngs); + ecc_stngs = NULL; + err_ret: return err; } @@ -2966,6 +2819,12 @@ static void __exit amd64_edac_exit(void) pci_unregister_driver(&amd64_pci_driver); + kfree(ecc_stngs); + ecc_stngs = NULL; + + kfree(mcis); + mcis = NULL; + msrs_free(msrs); msrs = NULL; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 044aee4f944..613ec72b0f6 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -74,11 +74,26 @@ #include "edac_core.h" #include "mce_amd.h" -#define amd64_printk(level, fmt, arg...) \ - edac_printk(level, "amd64", fmt, ##arg) +#define amd64_debug(fmt, arg...) \ + edac_printk(KERN_DEBUG, "amd64", fmt, ##arg) -#define amd64_mc_printk(mci, level, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "amd64", fmt, ##arg) +#define amd64_info(fmt, arg...) \ + edac_printk(KERN_INFO, "amd64", fmt, ##arg) + +#define amd64_notice(fmt, arg...) \ + edac_printk(KERN_NOTICE, "amd64", fmt, ##arg) + +#define amd64_warn(fmt, arg...) \ + edac_printk(KERN_WARNING, "amd64", fmt, ##arg) + +#define amd64_err(fmt, arg...) \ + edac_printk(KERN_ERR, "amd64", fmt, ##arg) + +#define amd64_mc_warn(mci, fmt, arg...) \ + edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg) + +#define amd64_mc_err(mci, fmt, arg...) \ + edac_mc_chipset_printk(mci, KERN_ERR, "amd64", fmt, ##arg) /* * Throughout the comments in this code, the following terms are used: @@ -129,11 +144,9 @@ * sections 3.5.4 and 3.5.5 for more information. */ -#define EDAC_AMD64_VERSION " Ver: 3.3.0 " __DATE__ +#define EDAC_AMD64_VERSION "v3.3.0" #define EDAC_MOD_STR "amd64_edac" -#define EDAC_MAX_NUMNODES 8 - /* Extended Model from CPUID, for CPU Revision numbers */ #define K8_REV_D 1 #define K8_REV_E 2 @@ -322,9 +335,6 @@ #define K8_SCRCTRL 0x58 #define F10_NB_CFG_LOW 0x88 -#define F10_NB_CFG_LOW_ENABLE_EXT_CFG BIT(14) - -#define F10_NB_CFG_HIGH 0x8C #define F10_ONLINE_SPARE 0xB0 #define F10_ONLINE_SPARE_SWAPDONE0(x) ((x) & BIT(1)) @@ -373,7 +383,6 @@ static inline int get_node_id(struct pci_dev *pdev) enum amd64_chipset_families { K8_CPUS = 0, F10_CPUS, - F11_CPUS, }; /* Error injection control structure */ @@ -384,16 +393,13 @@ struct error_injection { }; struct amd64_pvt { + struct low_ops *ops; + /* pci_device handles which we utilize */ - struct pci_dev *addr_f1_ctl; - struct pci_dev *dram_f2_ctl; - struct pci_dev *misc_f3_ctl; + struct pci_dev *F1, *F2, *F3; int mc_node_id; /* MC index of this MC node */ int ext_model; /* extended model value of this node */ - - struct low_ops *ops; /* pointer to per PCI Device ID func table */ - int channel_count; /* Raw registers */ @@ -455,27 +461,27 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; - /* Save old hw registers' values before we modified them */ - u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */ - u32 old_nbctl; + /* DCT per-family scrubrate setting */ + u32 min_scrubrate; - /* MC Type Index value: socket F vs Family 10h */ - u32 mc_type_index; + /* family name this instance is running on */ + const char *ctl_name; + +}; + +/* + * per-node ECC settings descriptor + */ +struct ecc_settings { + u32 old_nbctl; + bool nbctl_valid; - /* misc settings */ struct flags { - unsigned long cf8_extcfg:1; unsigned long nb_mce_enable:1; unsigned long nb_ecc_prev:1; } flags; }; -struct scrubrate { - u32 scrubval; /* bit pattern for scrub rate */ - u32 bandwidth; /* bandwidth consumed (bytes/sec) */ -}; - -extern struct scrubrate scrubrates[23]; extern const char *tt_msgs[4]; extern const char *ll_msgs[4]; extern const char *rrrr_msgs[16]; @@ -517,23 +523,10 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; - u16 addr_f1_ctl; - u16 misc_f3_ctl; + u16 f1_id, f3_id; struct low_ops ops; }; -static struct amd64_family_type amd64_family_types[]; - -static inline const char *get_amd_family_name(int index) -{ - return amd64_family_types[index].ctl_name; -} - -static inline struct low_ops *family_ops(int index) -{ - return &amd64_family_types[index].ops; -} - static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, u32 *val, const char *func) { @@ -541,8 +534,8 @@ static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, err = pci_read_config_dword(pdev, offset, val); if (err) - amd64_printk(KERN_WARNING, "%s: error reading F%dx%x.\n", - func, PCI_FUNC(pdev->devfn), offset); + amd64_warn("%s: error reading F%dx%x.\n", + func, PCI_FUNC(pdev->devfn), offset); return err; } @@ -556,7 +549,6 @@ static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, */ #define K8_MIN_SCRUB_RATE_BITS 0x0 #define F10_MIN_SCRUB_RATE_BITS 0x5 -#define F11_MIN_SCRUB_RATE_BITS 0x6 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size); diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 29f1f7a612d..688478de1cb 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -23,9 +23,7 @@ static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci, if (ret != -EINVAL) { if (value > 3) { - amd64_printk(KERN_WARNING, - "%s: invalid section 0x%lx\n", - __func__, value); + amd64_warn("%s: invalid section 0x%lx\n", __func__, value); return -EINVAL; } @@ -58,9 +56,7 @@ static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci, if (ret != -EINVAL) { if (value > 8) { - amd64_printk(KERN_WARNING, - "%s: invalid word 0x%lx\n", - __func__, value); + amd64_warn("%s: invalid word 0x%lx\n", __func__, value); return -EINVAL; } @@ -92,9 +88,8 @@ static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci, if (ret != -EINVAL) { if (value & 0xFFFF0000) { - amd64_printk(KERN_WARNING, - "%s: invalid EccVector: 0x%lx\n", - __func__, value); + amd64_warn("%s: invalid EccVector: 0x%lx\n", + __func__, value); return -EINVAL; } @@ -122,15 +117,13 @@ static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci, /* Form value to choose 16-byte section of cacheline */ section = F10_NB_ARRAY_DRAM_ECC | SET_NB_ARRAY_ADDRESS(pvt->injection.section); - pci_write_config_dword(pvt->misc_f3_ctl, - F10_NB_ARRAY_ADDR, section); + pci_write_config_dword(pvt->F3, F10_NB_ARRAY_ADDR, section); word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word, pvt->injection.bit_map); /* Issue 'word' and 'bit' along with the READ request */ - pci_write_config_dword(pvt->misc_f3_ctl, - F10_NB_ARRAY_DATA, word_bits); + pci_write_config_dword(pvt->F3, F10_NB_ARRAY_DATA, word_bits); debugf0("section=0x%x word_bits=0x%x\n", section, word_bits); @@ -157,15 +150,13 @@ static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci, /* Form value to choose 16-byte section of cacheline */ section = F10_NB_ARRAY_DRAM_ECC | SET_NB_ARRAY_ADDRESS(pvt->injection.section); - pci_write_config_dword(pvt->misc_f3_ctl, - F10_NB_ARRAY_ADDR, section); + pci_write_config_dword(pvt->F3, F10_NB_ARRAY_ADDR, section); word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word, pvt->injection.bit_map); /* Issue 'word' and 'bit' along with the READ request */ - pci_write_config_dword(pvt->misc_f3_ctl, - F10_NB_ARRAY_DATA, word_bits); + pci_write_config_dword(pvt->F3, F10_NB_ARRAY_DATA, word_bits); debugf0("section=0x%x word_bits=0x%x\n", section, word_bits); diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h index 60e0d1c72de..6f8b07131ec 100644 --- a/drivers/edac/amd8131_edac.h +++ b/drivers/edac/amd8131_edac.h @@ -99,7 +99,7 @@ struct amd8131_dev_info { /* * AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC - * Controler, and ATCA-6101 has two AMD8131 chipsets, so there are + * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are * four PCIX Bridges on ATCA-6101 altogether. * * These PCIX Bridges share the same PCI Device ID and are all of diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index c973004c002..db1df59ae2b 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -47,7 +47,7 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) offset = address & ~PAGE_MASK; syndrome = (ar & 0x000000001fe00000ul) >> 21; - /* TODO: Decoding of the error addresss */ + /* TODO: Decoding of the error address */ edac_mc_handle_ce(mci, csrow->first_page + pfn, offset, syndrome, 0, chan, ""); } @@ -68,7 +68,7 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) pfn = address >> PAGE_SHIFT; offset = address & ~PAGE_MASK; - /* TODO: Decoding of the error addresss */ + /* TODO: Decoding of the error address */ edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, ""); } diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 1609a19df49..b9a781c47e3 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -818,9 +818,10 @@ static void cpc925_del_edac_devices(void) } /* Convert current back-ground scrub rate into byte/sec bandwith */ -static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) +static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci) { struct cpc925_mc_pdata *pdata = mci->pvt_info; + int bw; u32 mscr; u8 si; @@ -832,11 +833,11 @@ static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) || (si == 0)) { cpc925_mc_printk(mci, KERN_INFO, "Scrub mode not enabled\n"); - *bw = 0; + bw = 0; } else - *bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si; + bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si; - return 0; + return bw; } /* Return 0 for single channel; 1 for dual channel */ diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 073f5a06d23..ec302d42658 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -983,11 +983,11 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) pci_write_config_word(pdev, E752X_MCHSCRB, scrubrates[i].scrubval); - return 0; + return scrubrates[i].bandwidth; } /* Convert current scrub rate value into byte/sec bandwidth */ -static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) +static int get_sdram_scrub_rate(struct mem_ctl_info *mci) { const struct scrubrate *scrubrates; struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; @@ -1013,10 +1013,8 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) "Invalid sdram scrub control value: 0x%x\n", scrubval); return -1; } + return scrubrates[i].bandwidth; - *bw = scrubrates[i].bandwidth; - - return 0; } /* Return 1 if dual channel mode is active. Else return 0. */ diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index d7ca43a828b..3d965347a67 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -41,10 +41,10 @@ #define MC_PROC_NAME_MAX_LEN 7 #if PAGE_SHIFT < 20 -#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) ) -#define MiB_TO_PAGES(mb) ((mb) >> (20 - PAGE_SHIFT)) +#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) +#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) #else /* PAGE_SHIFT > 20 */ -#define PAGES_TO_MiB( pages ) ( ( pages ) << ( PAGE_SHIFT - 20 ) ) +#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20)) #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20)) #endif @@ -68,9 +68,10 @@ #define EDAC_PCI "PCI" #define EDAC_DEBUG "DEBUG" +extern const char *edac_mem_types[]; + #ifdef CONFIG_EDAC_DEBUG extern int edac_debug_level; -extern const char *edac_mem_types[]; #define edac_debug_printk(level, fmt, arg...) \ do { \ @@ -258,7 +259,7 @@ enum scrub_type { * for single channel are 64 bits, for dual channel 128 * bits. * - * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memmory. + * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. * Motherboards commonly drive two chip-select pins to * a memory stick. A single-ranked stick, will occupy * only one of those rows. The other will be unused. @@ -386,7 +387,7 @@ struct mem_ctl_info { representation and converts it to the closest matching bandwith in bytes/sec. */ - int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); + int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); /* pointer to edac checking routine */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index ba6586a69cc..a4e9db2d652 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -76,6 +76,8 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci) debugf3("\tpvt_info = %p\n\n", mci->pvt_info); } +#endif /* CONFIG_EDAC_DEBUG */ + /* * keep those in sync with the enum mem_type */ @@ -100,8 +102,6 @@ const char *edac_mem_types[] = { }; EXPORT_SYMBOL_GPL(edac_mem_types); -#endif /* CONFIG_EDAC_DEBUG */ - /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. * Adjust 'ptr' so that its alignment is at least as stringent as what the * compiler would provide for X and return the aligned result. @@ -586,14 +586,16 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - /* marking MCI offline */ - mci->op_state = OP_OFFLINE; - del_mc_from_global_list(mci); mutex_unlock(&mem_ctls_mutex); - /* flush workq processes and remove sysfs */ + /* flush workq processes */ edac_mc_workq_teardown(mci); + + /* marking MCI offline */ + mci->op_state = OP_OFFLINE; + + /* remove from sysfs */ edac_remove_sysfs_mci_device(mci); edac_printk(KERN_INFO, EDAC_MC, diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index dce61f7ba38..39d97cfdf58 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -436,56 +436,55 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, return count; } -/* memory scrubbing */ +/* Memory scrubbing interface: + * + * A MC driver can limit the scrubbing bandwidth based on the CPU type. + * Therefore, ->set_sdram_scrub_rate should be made to return the actual + * bandwidth that is accepted or 0 when scrubbing is to be disabled. + * + * Negative value still means that an error has occurred while setting + * the scrub rate. + */ static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, const char *data, size_t count) { unsigned long bandwidth = 0; - int err; + int new_bw = 0; - if (!mci->set_sdram_scrub_rate) { - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrub rate setting not implemented!\n"); + if (!mci->set_sdram_scrub_rate) return -EINVAL; - } if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; - err = mci->set_sdram_scrub_rate(mci, (u32)bandwidth); - if (err) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Failed setting scrub rate to %lu\n", bandwidth); - return -EINVAL; - } - else { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set to: %lu\n", bandwidth); + new_bw = mci->set_sdram_scrub_rate(mci, bandwidth); + if (new_bw >= 0) { + edac_printk(KERN_DEBUG, EDAC_MC, "Scrub rate set to %d\n", new_bw); return count; } + + edac_printk(KERN_DEBUG, EDAC_MC, "Error setting scrub rate to: %lu\n", bandwidth); + return -EINVAL; } +/* + * ->get_sdram_scrub_rate() return value semantics same as above. + */ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) { - u32 bandwidth = 0; - int err; + int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) { - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrub rate reading not implemented\n"); + if (!mci->get_sdram_scrub_rate) return -EINVAL; - } - err = mci->get_sdram_scrub_rate(mci, &bandwidth); - if (err) { + bandwidth = mci->get_sdram_scrub_rate(mci); + if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); - return err; - } - else { - edac_printk(KERN_DEBUG, EDAC_MC, - "Read scrub rate: %d\n", bandwidth); - return sprintf(data, "%d\n", bandwidth); + return bandwidth; } + + edac_printk(KERN_DEBUG, EDAC_MC, "Read scrub rate: %d\n", bandwidth); + return sprintf(data, "%d\n", bandwidth); } /* default attribute files for the MCI object */ diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index f459a6c0886..0448da0af75 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -611,20 +611,17 @@ static int i5100_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) bandwidth = 5900000 * i5100_mc_scrben(dw); - return 0; + return bandwidth; } -static int i5100_get_scrub_rate(struct mem_ctl_info *mci, - u32 *bandwidth) +static int i5100_get_scrub_rate(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; u32 dw; pci_read_config_dword(priv->mc, I5100_MC, &dw); - *bandwidth = 5900000 * i5100_mc_scrben(dw); - - return 0; + return 5900000 * i5100_mc_scrben(dw); } static struct pci_dev *pci_get_device_func(unsigned vendor, diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 362861c1577..81154ab296b 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1,6 +1,6 @@ /* Intel i7 core/Nehalem Memory Controller kernel module * - * This driver supports yhe memory controllers found on the Intel + * This driver supports the memory controllers found on the Intel * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx, * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield * and Westmere-EP. @@ -1271,7 +1271,7 @@ static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table) int i; /* - * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses + * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses * aren't announced by acpi. So, we need to use a legacy scan probing * to detect them */ @@ -1864,7 +1864,7 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) if (mce->mcgstatus & 1) i7core_check_error(mci); - /* Advice mcelog that the error were handled */ + /* Advise mcelog that the errors were handled */ return 1; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index c0181093b49..f6cf73d9335 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -5,6 +5,7 @@ static struct amd_decoder_ops *fam_ops; +static u8 xec_mask = 0xf; static u8 nb_err_cpumask = 0xf; static bool report_gart_errors; @@ -74,57 +75,104 @@ static const char *f10h_nb_mce_desc[] = { "ECC Error in the Probe Filter directory" }; -static bool f12h_dc_mce(u16 ec) +static const char * const f15h_ic_mce_desc[] = { + "UC during a demand linefill from L2", + "Parity error during data load from IC", + "Parity error for IC valid bit", + "Main tag parity error", + "Parity error in prediction queue", + "PFB data/address parity error", + "Parity error in the branch status reg", + "PFB promotion address error", + "Tag error during probe/victimization", + "Parity error for IC probe tag valid bit", + "PFB non-cacheable bit parity error", + "PFB valid bit parity error", /* xec = 0xd */ + "patch RAM", /* xec = 010 */ + "uop queue", + "insn buffer", + "predecode buffer", + "fetch address FIFO" +}; + +static const char * const f15h_cu_mce_desc[] = { + "Fill ECC error on data fills", /* xec = 0x4 */ + "Fill parity error on insn fills", + "Prefetcher request FIFO parity error", + "PRQ address parity error", + "PRQ data parity error", + "WCC Tag ECC error", + "WCC Data ECC error", + "WCB Data parity error", + "VB Data/ECC error", + "L2 Tag ECC error", /* xec = 0x10 */ + "Hard L2 Tag ECC error", + "Multiple hits on L2 tag", + "XAB parity error", + "PRB address parity error" +}; + +static const char * const fr_ex_mce_desc[] = { + "CPU Watchdog timer expire", + "Wakeup array dest tag", + "AG payload array", + "EX payload array", + "IDRF array", + "Retire dispatch queue", + "Mapper checkpoint array", + "Physical register file EX0 port", + "Physical register file EX1 port", + "Physical register file AG0 port", + "Physical register file AG1 port", + "Flag register file", + "DE correctable error could not be corrected" +}; + +static bool f12h_dc_mce(u16 ec, u8 xec) { bool ret = false; if (MEM_ERROR(ec)) { - u8 ll = ec & 0x3; + u8 ll = LL(ec); ret = true; if (ll == LL_L2) pr_cont("during L1 linefill from L2.\n"); else if (ll == LL_L1) - pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec)); + pr_cont("Data/Tag %s error.\n", R4_MSG(ec)); else ret = false; } return ret; } -static bool f10h_dc_mce(u16 ec) +static bool f10h_dc_mce(u16 ec, u8 xec) { - u8 r4 = (ec >> 4) & 0xf; - u8 ll = ec & 0x3; - - if (r4 == R4_GEN && ll == LL_L1) { + if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec); + return f12h_dc_mce(ec, xec); } -static bool k8_dc_mce(u16 ec) +static bool k8_dc_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec); + return f10h_dc_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec) +static bool f14h_dc_mce(u16 ec, u8 xec) { - u8 r4 = (ec >> 4) & 0xf; - u8 ll = ec & 0x3; - u8 tt = (ec >> 2) & 0x3; - u8 ii = tt; + u8 r4 = R4(ec); bool ret = true; if (MEM_ERROR(ec)) { - if (tt != TT_DATA || ll != LL_L1) + if (TT(ec) != TT_DATA || LL(ec) != LL_L1) return false; switch (r4) { @@ -144,7 +192,7 @@ static bool f14h_dc_mce(u16 ec) } } else if (BUS_ERROR(ec)) { - if ((ii != II_MEM && ii != II_IO) || ll != LL_LG) + if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG) return false; pr_cont("System read data error on a "); @@ -169,39 +217,78 @@ static bool f14h_dc_mce(u16 ec) return ret; } +static bool f15h_dc_mce(u16 ec, u8 xec) +{ + bool ret = true; + + if (MEM_ERROR(ec)) { + + switch (xec) { + case 0x0: + pr_cont("Data Array access error.\n"); + break; + + case 0x1: + pr_cont("UC error during a linefill from L2/NB.\n"); + break; + + case 0x2: + case 0x11: + pr_cont("STQ access error.\n"); + break; + + case 0x3: + pr_cont("SCB access error.\n"); + break; + + case 0x10: + pr_cont("Tag error.\n"); + break; + + case 0x12: + pr_cont("LDQ access error.\n"); + break; + + default: + ret = false; + } + } else if (BUS_ERROR(ec)) { + + if (!xec) + pr_cont("during system linefill.\n"); + else + pr_cont(" Internal %s condition.\n", + ((xec == 1) ? "livelock" : "deadlock")); + } else + ret = false; + + return ret; +} + static void amd_decode_dc_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Data Cache Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { - u8 tt = (ec >> 2) & 0x3; - - if (tt == TT_DATA) { + if (TT(ec) == TT_DATA) { pr_cont("%s TLB %s.\n", LL_MSG(ec), - (xec ? "multimatch" : "parity error")); + ((xec == 2) ? "locked miss" + : (xec ? "multimatch" : "parity"))); return; } - else - goto wrong_dc_mce; - } - - if (!fam_ops->dc_mce(ec)) - goto wrong_dc_mce; - - return; - -wrong_dc_mce: - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + } else if (fam_ops->dc_mce(ec, xec)) + ; + else + pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); } -static bool k8_ic_mce(u16 ec) +static bool k8_ic_mce(u16 ec, u8 xec) { - u8 ll = ec & 0x3; - u8 r4 = (ec >> 4) & 0xf; + u8 ll = LL(ec); bool ret = true; if (!MEM_ERROR(ec)) @@ -210,7 +297,7 @@ static bool k8_ic_mce(u16 ec) if (ll == 0x2) pr_cont("during a linefill from L2.\n"); else if (ll == 0x1) { - switch (r4) { + switch (R4(ec)) { case R4_IRD: pr_cont("Parity error during data load.\n"); break; @@ -233,15 +320,13 @@ static bool k8_ic_mce(u16 ec) return ret; } -static bool f14h_ic_mce(u16 ec) +static bool f14h_ic_mce(u16 ec, u8 xec) { - u8 ll = ec & 0x3; - u8 tt = (ec >> 2) & 0x3; - u8 r4 = (ec >> 4) & 0xf; + u8 r4 = R4(ec); bool ret = true; if (MEM_ERROR(ec)) { - if (tt != 0 || ll != 1) + if (TT(ec) != 0 || LL(ec) != 1) ret = false; if (r4 == R4_IRD) @@ -254,10 +339,36 @@ static bool f14h_ic_mce(u16 ec) return ret; } +static bool f15h_ic_mce(u16 ec, u8 xec) +{ + bool ret = true; + + if (!MEM_ERROR(ec)) + return false; + + switch (xec) { + case 0x0 ... 0xa: + pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + break; + + case 0xd: + pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + break; + + case 0x10 ... 0x14: + pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + break; + + default: + ret = false; + } + return ret; +} + static void amd_decode_ic_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Instruction Cache Error: "); @@ -268,7 +379,7 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec)) + } else if (fam_ops->ic_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); @@ -276,8 +387,8 @@ static void amd_decode_ic_mce(struct mce *m) static void amd_decode_bu_mce(struct mce *m) { - u32 ec = m->status & 0xffff; - u32 xec = (m->status >> 16) & 0xf; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); pr_emerg(HW_ERR "Bus Unit Error"); @@ -286,23 +397,23 @@ static void amd_decode_bu_mce(struct mce *m) else if (xec == 0x3) pr_cont(" in the victim data buffers.\n"); else if (xec == 0x2 && MEM_ERROR(ec)) - pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); + pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); else if (xec == 0x0) { if (TLB_ERROR(ec)) pr_cont(": %s error in a Page Descriptor Cache or " "Guest TLB.\n", TT_MSG(ec)); else if (BUS_ERROR(ec)) pr_cont(": %s/ECC error in data read from NB: %s.\n", - RRRR_MSG(ec), PP_MSG(ec)); + R4_MSG(ec), PP_MSG(ec)); else if (MEM_ERROR(ec)) { - u8 rrrr = (ec >> 4) & 0xf; + u8 r4 = R4(ec); - if (rrrr >= 0x7) + if (r4 >= 0x7) pr_cont(": %s error during data copyback.\n", - RRRR_MSG(ec)); - else if (rrrr <= 0x1) + R4_MSG(ec)); + else if (r4 <= 0x1) pr_cont(": %s parity/ECC error during data " - "access from L2.\n", RRRR_MSG(ec)); + "access from L2.\n", R4_MSG(ec)); else goto wrong_bu_mce; } else @@ -316,12 +427,52 @@ wrong_bu_mce: pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); } +static void amd_decode_cu_mce(struct mce *m) +{ + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); + + pr_emerg(HW_ERR "Combined Unit Error: "); + + if (TLB_ERROR(ec)) { + if (xec == 0x0) + pr_cont("Data parity TLB read error.\n"); + else if (xec == 0x1) + pr_cont("Poison data provided for TLB fill.\n"); + else + goto wrong_cu_mce; + } else if (BUS_ERROR(ec)) { + if (xec > 2) + goto wrong_cu_mce; + + pr_cont("Error during attempted NB data read.\n"); + } else if (MEM_ERROR(ec)) { + switch (xec) { + case 0x4 ... 0xc: + pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + break; + + case 0x10 ... 0x14: + pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + break; + + default: + goto wrong_cu_mce; + } + } + + return; + +wrong_cu_mce: + pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); +} + static void amd_decode_ls_mce(struct mce *m) { - u16 ec = m->status & 0xffff; - u8 xec = (m->status >> 16) & 0xf; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); - if (boot_cpu_data.x86 == 0x14) { + if (boot_cpu_data.x86 >= 0x14) { pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," " please report on LKML.\n"); return; @@ -330,12 +481,12 @@ static void amd_decode_ls_mce(struct mce *m) pr_emerg(HW_ERR "Load Store Error"); if (xec == 0x0) { - u8 r4 = (ec >> 4) & 0xf; + u8 r4 = R4(ec); if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) goto wrong_ls_mce; - pr_cont(" during %s.\n", RRRR_MSG(ec)); + pr_cont(" during %s.\n", R4_MSG(ec)); } else goto wrong_ls_mce; @@ -410,6 +561,15 @@ static bool f10h_nb_mce(u16 ec, u8 xec) goto out; break; + case 0x19: + if (boot_cpu_data.x86 == 0x15) + pr_cont("Compute Unit Data Error.\n"); + else + ret = false; + + goto out; + break; + case 0x1c ... 0x1f: offset = 24; break; @@ -434,27 +594,30 @@ static bool nb_noop_mce(u16 ec, u8 xec) void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) { - u8 xec = (m->status >> 16) & 0x1f; - u16 ec = m->status & 0xffff; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, 0x1f); u32 nbsh = (u32)(m->status >> 32); + int core = -1; - pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); + pr_emerg(HW_ERR "Northbridge Error (node %d", node_id); - /* - * F10h, revD can disable ErrCpu[3:0] so check that first and also the - * value encoding has changed so interpret those differently - */ + /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */ if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model > 7)) { if (nbsh & K8_NBSH_ERR_CPU_VAL) - pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask)); + core = nbsh & nb_err_cpumask; } else { u8 assoc_cpus = nbsh & nb_err_cpumask; if (assoc_cpus > 0) - pr_cont(", core: %d", fls(assoc_cpus) - 1); + core = fls(assoc_cpus) - 1; } + if (core >= 0) + pr_cont(", core %d): ", core); + else + pr_cont("): "); + switch (xec) { case 0x2: pr_cont("Sync error (sync packets on HT link detected).\n"); @@ -496,35 +659,89 @@ EXPORT_SYMBOL_GPL(amd_decode_nb_mce); static void amd_decode_fr_mce(struct mce *m) { - if (boot_cpu_data.x86 == 0xf || - boot_cpu_data.x86 == 0x11) + struct cpuinfo_x86 *c = &boot_cpu_data; + u8 xec = XEC(m->status, xec_mask); + + if (c->x86 == 0xf || c->x86 == 0x11) goto wrong_fr_mce; - /* we have only one error signature so match all fields at once. */ - if ((m->status & 0xffff) == 0x0f0f) { - pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n"); - return; - } + if (c->x86 != 0x15 && xec != 0x0) + goto wrong_fr_mce; + + pr_emerg(HW_ERR "%s Error: ", + (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + + if (xec == 0x0 || xec == 0xc) + pr_cont("%s.\n", fr_ex_mce_desc[xec]); + else if (xec < 0xd) + pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + else + goto wrong_fr_mce; + + return; wrong_fr_mce: pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); } +static void amd_decode_fp_mce(struct mce *m) +{ + u8 xec = XEC(m->status, xec_mask); + + pr_emerg(HW_ERR "Floating Point Unit Error: "); + + switch (xec) { + case 0x1: + pr_cont("Free List"); + break; + + case 0x2: + pr_cont("Physical Register File"); + break; + + case 0x3: + pr_cont("Retire Queue"); + break; + + case 0x4: + pr_cont("Scheduler table"); + break; + + case 0x5: + pr_cont("Status Register File"); + break; + + default: + goto wrong_fp_mce; + break; + } + + pr_cont(" parity error.\n"); + + return; + +wrong_fp_mce: + pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); +} + static inline void amd_decode_err_code(u16 ec) { - if (TLB_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n", - TT_MSG(ec), LL_MSG(ec)); - } else if (MEM_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", - RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); - } else if (BUS_ERROR(ec)) { - pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " - "Participating Processor: %s\n", - RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), - PP_MSG(ec)); - } else - pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); + + pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); + + if (BUS_ERROR(ec)) + pr_cont(", mem/io: %s", II_MSG(ec)); + else + pr_cont(", tx: %s", TT_MSG(ec)); + + if (MEM_ERROR(ec) || BUS_ERROR(ec)) { + pr_cont(", mem-tx: %s", R4_MSG(ec)); + + if (BUS_ERROR(ec)) + pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec)); + } + + pr_cont("\n"); } /* @@ -546,25 +763,32 @@ static bool amd_filter_mce(struct mce *m) int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; + struct cpuinfo_x86 *c = &boot_cpu_data; int node, ecc; if (amd_filter_mce(m)) return NOTIFY_STOP; - pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); + pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s", + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), + ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - pr_cont("%sorrected error, other errors lost: %s, " - "CPU context corrupt: %s", - ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), - ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), - ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); + if (c->x86 == 0x15) + pr_cont("|%s|%s", + ((m->status & BIT_64(44)) ? "Deferred" : "-"), + ((m->status & BIT_64(43)) ? "Poison" : "-")); /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; if (ecc) - pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); + + pr_cont("]: 0x%016llx\n", m->status); - pr_cont("\n"); switch (m->bank) { case 0: @@ -576,7 +800,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; case 2: - amd_decode_bu_mce(m); + if (c->x86 == 0x15) + amd_decode_cu_mce(m); + else + amd_decode_bu_mce(m); break; case 3: @@ -592,6 +819,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) amd_decode_fr_mce(m); break; + case 6: + amd_decode_fp_mce(m); + break; + default: break; } @@ -608,18 +839,21 @@ static struct notifier_block amd_mce_dec_nb = { static int __init mce_amd_init(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86_vendor != X86_VENDOR_AMD) return 0; - if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) && - (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf)) + if ((c->x86 < 0xf || c->x86 > 0x12) && + (c->x86 != 0x14 || c->x86_model > 0xf) && + (c->x86 != 0x15 || c->x86_model > 0xf)) return 0; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (!fam_ops) return -ENOMEM; - switch (boot_cpu_data.x86) { + switch (c->x86) { case 0xf: fam_ops->dc_mce = k8_dc_mce; fam_ops->ic_mce = k8_ic_mce; @@ -651,9 +885,15 @@ static int __init mce_amd_init(void) fam_ops->nb_mce = nb_noop_mce; break; + case 0x15: + xec_mask = 0x1f; + fam_ops->dc_mce = f15h_dc_mce; + fam_ops->ic_mce = f15h_ic_mce; + fam_ops->nb_mce = f10h_nb_mce; + break; + default: - printk(KERN_WARNING "Huh? What family is that: %d?!\n", - boot_cpu_data.x86); + printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86); kfree(fam_ops); return -EINVAL; } diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 35f6e0e3b29..45dda47173f 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -7,8 +7,8 @@ #define BIT_64(n) (U64_C(1) << (n)) -#define ERROR_CODE(x) ((x) & 0xffff) -#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) +#define EC(x) ((x) & 0xffff) +#define XEC(x, mask) (((x) >> 16) & mask) #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) @@ -21,15 +21,15 @@ #define TT_MSG(x) tt_msgs[TT(x)] #define II(x) (((x) >> 2) & 0x3) #define II_MSG(x) ii_msgs[II(x)] -#define LL(x) (((x) >> 0) & 0x3) +#define LL(x) ((x) & 0x3) #define LL_MSG(x) ll_msgs[LL(x)] #define TO(x) (((x) >> 8) & 0x1) #define TO_MSG(x) to_msgs[TO(x)] #define PP(x) (((x) >> 9) & 0x3) #define PP_MSG(x) pp_msgs[PP(x)] -#define RRRR(x) (((x) >> 4) & 0xf) -#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!") +#define R4(x) (((x) >> 4) & 0xf) +#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") #define K8_NBSH 0x4C @@ -100,8 +100,8 @@ struct err_regs { * per-family decoder ops */ struct amd_decoder_ops { - bool (*dc_mce)(u16); - bool (*ic_mce)(u16); + bool (*dc_mce)(u16, u8); + bool (*ic_mce)(u16, u8); bool (*nb_mce)(u16, u8); }; diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 39faded3cad..733a7e7a8d6 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -88,10 +88,11 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj, return -EINVAL; } - if (value > 5) { - printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); - return -EINVAL; - } + if (value > 5) + if (boot_cpu_data.x86 != 0x15 || value > 6) { + printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); + return -EINVAL; + } i_mce.bank = value; diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 070cea41b66..b9f0c20df1a 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -873,7 +873,7 @@ ppc4xx_edac_get_mtype(u32 mcopt1) } /** - * ppc4xx_edac_init_csrows - intialize driver instance rows + * ppc4xx_edac_init_csrows - initialize driver instance rows * @mci: A pointer to the EDAC memory controller instance * associated with the ibm,sdram-4xx-ddr2 controller for which * the csrows (i.e. banks/ranks) are being initialized. @@ -881,7 +881,7 @@ ppc4xx_edac_get_mtype(u32 mcopt1) * currently set for the controller, from which bank width * and memory typ information is derived. * - * This routine intializes the virtual "chip select rows" associated + * This routine initializes the virtual "chip select rows" associated * with the EDAC memory controller instance. An ibm,sdram-4xx-ddr2 * controller bank/rank is mapped to a row. * @@ -992,7 +992,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) } /** - * ppc4xx_edac_mc_init - intialize driver instance + * ppc4xx_edac_mc_init - initialize driver instance * @mci: A pointer to the EDAC memory controller instance being * initialized. * @op: A pointer to the OpenFirmware device tree node associated |