summaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig41
-rw-r--r--drivers/edac/Makefile5
-rw-r--r--drivers/edac/amd64_edac.c297
-rw-r--r--drivers/edac/amd64_edac.h59
-rw-r--r--drivers/edac/amd64_edac_inj.c128
-rw-r--r--drivers/edac/edac_mc.c65
-rw-r--r--drivers/edac/edac_mc_sysfs.c39
-rw-r--r--drivers/edac/edac_module.c27
-rw-r--r--drivers/edac/edac_pci.c3
-rw-r--r--drivers/edac/edac_pci_sysfs.c12
-rw-r--r--drivers/edac/highbank_mc_edac.c8
-rw-r--r--drivers/edac/i7300_edac.c8
-rw-r--r--drivers/edac/i7core_edac.c6
-rw-r--r--drivers/edac/i82975x_edac.c11
-rw-r--r--drivers/edac/mce_amd.c254
-rw-r--r--drivers/edac/mce_amd.h11
-rw-r--r--drivers/edac/octeon_edac-l2c.c208
-rw-r--r--drivers/edac/octeon_edac-lmc.c186
-rw-r--r--drivers/edac/octeon_edac-pc.c143
-rw-r--r--drivers/edac/octeon_edac-pci.c111
20 files changed, 1158 insertions, 464 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 409b92b8d34..4c6c876d9dc 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -7,7 +7,7 @@
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
- depends on X86 || PPC || TILE || ARM
+ depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
@@ -27,6 +27,9 @@ menuconfig EDAC
There is also a mailing list for the EDAC project, which can
be found via the sourceforge page.
+config EDAC_SUPPORT
+ bool
+
if EDAC
comment "Reporting subsystems"
@@ -42,10 +45,10 @@ config EDAC_LEGACY_SYSFS
config EDAC_DEBUG
bool "Debugging"
help
- This turns on debugging information for the entire EDAC
- sub-system. You can insert module with "debug_level=x", current
- there're four debug levels (x=0,1,2,3 from low to high).
- Usually you should select 'N'.
+ This turns on debugging information for the entire EDAC subsystem.
+ You do so by inserting edac_module with "edac_debug_level=x." Valid
+ levels are 0-4 (from low to high) and by default it is set to 2.
+ Usually you should select 'N' here.
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
@@ -316,4 +319,32 @@ config EDAC_HIGHBANK_L2
Support for error detection and correction on the
Calxeda Highbank memory controller.
+config EDAC_OCTEON_PC
+ tristate "Cavium Octeon Primary Caches"
+ depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+ help
+ Support for error detection and correction on the primary caches of
+ the cnMIPS cores of Cavium Octeon family SOCs.
+
+config EDAC_OCTEON_L2C
+ tristate "Cavium Octeon Secondary Caches (L2C)"
+ depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
+config EDAC_OCTEON_LMC
+ tristate "Cavium Octeon DRAM Memory Controller (LMC)"
+ depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
+config EDAC_OCTEON_PCI
+ tristate "Cavium Octeon PCI Controller"
+ depends on EDAC_MM_EDAC && PCI && CPU_CAVIUM_OCTEON
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 7e5129a733f..5608a9ba61b 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -58,3 +58,8 @@ obj-$(CONFIG_EDAC_TILE) += tile_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
+
+obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
+obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
+obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
+obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c78a23..f74a684269f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -60,8 +60,8 @@ struct scrubrate {
{ 0x00, 0UL}, /* scrubbing off */
};
-static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
- u32 *val, const char *func)
+int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 *val, const char *func)
{
int err = 0;
@@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u64 base;
/* only revE and later have the DRAM Hole Address Register */
if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
@@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
* addresses in the hole so that they start at 0x100000000.
*/
- base = dhar_base(pvt);
-
- *hole_base = base;
- *hole_size = (0x1ull << 32) - base;
+ *hole_base = dhar_base(pvt);
+ *hole_size = (1ULL << 32) - *hole_base;
if (boot_cpu_data.x86 > 0xf)
*hole_offset = f10_dhar_offset(pvt);
@@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
struct amd64_pvt *pvt = mci->pvt_info;
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
- int ret = 0;
+ int ret;
dram_base = get_dram_base(pvt, pvt->mc_node_id);
ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
&hole_size);
if (!ret) {
- if ((sys_addr >= (1ull << 32)) &&
- (sys_addr < ((1ull << 32) + hole_size))) {
+ if ((sys_addr >= (1ULL << 32)) &&
+ (sys_addr < ((1ULL << 32) + hole_size))) {
/* use DHAR to translate SysAddr to DramAddr */
dram_addr = sys_addr - hole_offset;
@@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
/* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address,
- u32 *page, u32 *offset)
+ struct err_info *err)
{
- *page = (u32) (error_address >> PAGE_SHIFT);
- *offset = ((u32) error_address) & ~PAGE_MASK;
+ err->page = (u32) (error_address >> PAGE_SHIFT);
+ err->offset = ((u32) error_address) & ~PAGE_MASK;
}
/*
@@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
}
static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
- u16 syndrome)
+ struct err_info *err)
{
- struct mem_ctl_info *src_mci;
struct amd64_pvt *pvt = mci->pvt_info;
- int channel, csrow;
- u32 page, offset;
- error_address_to_page_and_offset(sys_addr, &page, &offset);
+ error_address_to_page_and_offset(sys_addr, err);
/*
* Find out which node the error address belongs to. This may be
* different from the node that detected the error.
*/
- src_mci = find_mc_by_sys_addr(mci, sys_addr);
- if (!src_mci) {
+ err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
+ if (!err->src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offset, syndrome,
- -1, -1, -1,
- "failed to map error addr to a node",
- "");
+ err->err_code = ERR_NODE;
return;
}
/* Now map the sys_addr to a CSROW */
- csrow = sys_addr_to_csrow(src_mci, sys_addr);
- if (csrow < 0) {
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offset, syndrome,
- -1, -1, -1,
- "failed to map error addr to a csrow",
- "");
+ err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
+ if (err->csrow < 0) {
+ err->err_code = ERR_CSROW;
return;
}
/* CHIPKILL enabled */
if (pvt->nbcfg & NBCFG_CHIPKILL) {
- channel = get_channel_from_ecc_syndrome(mci, syndrome);
- if (channel < 0) {
+ err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
+ if (err->channel < 0) {
/*
* Syndrome didn't map, so we don't know which of the
* 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect.
*/
- amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
+ amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
"possible error reporting race\n",
- syndrome);
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offset, syndrome,
- csrow, -1, -1,
- "unknown syndrome - possible error reporting race",
- "");
+ err->syndrome);
+ err->err_code = ERR_CHANNEL;
return;
}
} else {
@@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* was obtained from email communication with someone at AMD.
* (Wish the email was placed in this comment - norsk)
*/
- channel = ((sys_addr & BIT(3)) != 0);
+ err->channel = ((sys_addr & BIT(3)) != 0);
}
-
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
- page, offset, syndrome,
- csrow, channel, -1,
- "", "");
}
static int ddr2_cs_size(unsigned i, bool dct_width)
@@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
/* For a given @dram_range, check if @sys_addr falls within it. */
static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
- u64 sys_addr, int *nid, int *chan_sel)
+ u64 sys_addr, int *chan_sel)
{
int cs_found = -EINVAL;
u64 chan_addr;
@@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
- if (cs_found >= 0) {
- *nid = node_id;
+ if (cs_found >= 0)
*chan_sel = channel;
- }
+
return cs_found;
}
static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
- int *node, int *chan_sel)
+ int *chan_sel)
{
int cs_found = -EINVAL;
unsigned range;
@@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
(get_dram_limit(pvt, range) >= sys_addr)) {
cs_found = f1x_match_to_this_node(pvt, range,
- sys_addr, node,
- chan_sel);
+ sys_addr, chan_sel);
if (cs_found >= 0)
break;
}
@@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
* (MCX_ADDR).
*/
static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
- u16 syndrome)
+ struct err_info *err)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u32 page, offset;
- int nid, csrow, chan = 0;
- error_address_to_page_and_offset(sys_addr, &page, &offset);
+ error_address_to_page_and_offset(sys_addr, err);
- csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
-
- if (csrow < 0) {
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offset, syndrome,
- -1, -1, -1,
- "failed to map error addr to a csrow",
- "");
+ err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
+ if (err->csrow < 0) {
+ err->err_code = ERR_CSROW;
return;
}
@@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* this point.
*/
if (dct_ganging_enabled(pvt))
- chan = get_channel_from_ecc_syndrome(mci, syndrome);
-
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- page, offset, syndrome,
- csrow, chan, -1,
- "", "");
+ err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
}
/*
@@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
*/
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
{
- int dimm, size0, size1, factor = 0;
+ int dimm, size0, size1;
u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
if (boot_cpu_data.x86 == 0xf) {
- if (pvt->dclr0 & WIDTH_128)
- factor = 1;
-
/* K8 families < revF not supported yet */
if (pvt->ext_model < K8_REV_F)
return;
@@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
DBAM_DIMM(dimm, dbam));
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
- dimm * 2, size0 << factor,
- dimm * 2 + 1, size1 << factor);
+ dimm * 2, size0,
+ dimm * 2 + 1, size1);
}
}
@@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
}
-/*
- * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
- * ADDRESS and process.
- */
-static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- u64 sys_addr;
- u16 syndrome;
-
- /* Ensure that the Error Address is VALID */
- if (!(m->status & MCI_STATUS_ADDRV)) {
- amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- 0, 0, 0,
- -1, -1, -1,
- "HW has no ERROR_ADDRESS available",
- "");
- return;
- }
-
- sys_addr = get_error_address(m);
- syndrome = extract_syndrome(m->status);
-
- amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
-
- pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
-}
-
-/* Handle any Un-correctable Errors (UEs) */
-static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
+static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
+ u8 ecc_type)
{
- struct mem_ctl_info *log_mci, *src_mci = NULL;
- int csrow;
- u64 sys_addr;
- u32 page, offset;
-
- log_mci = mci;
+ enum hw_event_mc_err_type err_type;
+ const char *string;
- if (!(m->status & MCI_STATUS_ADDRV)) {
- amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- 0, 0, 0,
- -1, -1, -1,
- "HW has no ERROR_ADDRESS available",
- "");
+ if (ecc_type == 2)
+ err_type = HW_EVENT_ERR_CORRECTED;
+ else if (ecc_type == 1)
+ err_type = HW_EVENT_ERR_UNCORRECTED;
+ else {
+ WARN(1, "Something is rotten in the state of Denmark.\n");
return;
}
- sys_addr = get_error_address(m);
- error_address_to_page_and_offset(sys_addr, &page, &offset);
-
- /*
- * Find out which node the error address belongs to. This may be
- * different from the node that detected the error.
- */
- src_mci = find_mc_by_sys_addr(mci, sys_addr);
- if (!src_mci) {
- amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
- (unsigned long)sys_addr);
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- page, offset, 0,
- -1, -1, -1,
- "ERROR ADDRESS NOT mapped to a MC",
- "");
- return;
+ switch (err->err_code) {
+ case DECODE_OK:
+ string = "";
+ break;
+ case ERR_NODE:
+ string = "Failed to map error addr to a node";
+ break;
+ case ERR_CSROW:
+ string = "Failed to map error addr to a csrow";
+ break;
+ case ERR_CHANNEL:
+ string = "unknown syndrome - possible error reporting race";
+ break;
+ default:
+ string = "WTF error";
+ break;
}
- log_mci = src_mci;
-
- csrow = sys_addr_to_csrow(log_mci, sys_addr);
- if (csrow < 0) {
- amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
- (unsigned long)sys_addr);
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- page, offset, 0,
- -1, -1, -1,
- "ERROR ADDRESS NOT mapped to CS",
- "");
- } else {
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- page, offset, 0,
- csrow, -1, -1,
- "", "");
- }
+ edac_mc_handle_error(err_type, mci, 1,
+ err->page, err->offset, err->syndrome,
+ err->csrow, err->channel, -1,
+ string, "");
}
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
struct mce *m)
{
- u16 ec = EC(m->status);
- u8 xec = XEC(m->status, 0x1f);
+ struct amd64_pvt *pvt = mci->pvt_info;
u8 ecc_type = (m->status >> 45) & 0x3;
+ u8 xec = XEC(m->status, 0x1f);
+ u16 ec = EC(m->status);
+ u64 sys_addr;
+ struct err_info err;
- /* Bail early out if this was an 'observed' error */
+ /* Bail out early if this was an 'observed' error */
if (PP(ec) == NBSL_PP_OBS)
return;
@@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
if (xec && xec != F10_NBSL_EXT_ERR_ECC)
return;
+ memset(&err, 0, sizeof(err));
+
+ sys_addr = get_error_address(m);
+
if (ecc_type == 2)
- amd64_handle_ce(mci, m);
- else if (ecc_type == 1)
- amd64_handle_ue(mci, m);
+ err.syndrome = extract_syndrome(m->status);
+
+ pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
+
+ __log_bus_error(mci, &err, ecc_type);
}
void amd64_decode_bus_error(int node_id, struct mce *m)
@@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
u32 cs_mode, nr_pages;
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
+
/*
* The math on this doesn't look right on the surface because x/2*4 can
* be simplified to x*2 but this expression makes use of the fact that
@@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
* number of bits to shift the DBAM register to extract the proper CSROW
* field.
*/
- cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF;
+ cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
- edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
- edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n",
- nr_pages, pvt->channel_count);
+ edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
+ csrow_nr, dct, cs_mode);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
return nr_pages;
}
@@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
*/
static int init_csrows(struct mem_ctl_info *mci)
{
+ struct amd64_pvt *pvt = mci->pvt_info;
struct csrow_info *csrow;
struct dimm_info *dimm;
- struct amd64_pvt *pvt = mci->pvt_info;
- u64 base, mask;
- u32 val;
- int i, j, empty = 1;
- enum mem_type mtype;
enum edac_type edac_mode;
+ enum mem_type mtype;
+ int i, j, empty = 1;
int nr_pages = 0;
+ u32 val;
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
@@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci)
pvt->mc_node_id, val,
!!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
+ /*
+ * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
+ */
for_each_chip_select(i, 0, pvt) {
- csrow = mci->csrows[i];
+ bool row_dct0 = !!csrow_enabled(i, 0, pvt);
+ bool row_dct1 = false;
+
+ if (boot_cpu_data.x86 != 0xf)
+ row_dct1 = !!csrow_enabled(i, 1, pvt);
- if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) {
- edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
- i, pvt->mc_node_id);
+ if (!row_dct0 && !row_dct1)
continue;
- }
+ csrow = mci->csrows[i];
empty = 0;
- if (csrow_enabled(i, 0, pvt))
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, i);
+
+ if (row_dct0)
nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
- if (csrow_enabled(i, 1, pvt))
- nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
- get_cs_base_and_mask(pvt, i, 0, &base, &mask);
- /* 8 bytes of resolution */
+ /* K8 has only one DCT */
+ if (boot_cpu_data.x86 != 0xf && row_dct1)
+ nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
mtype = amd64_determine_memory_type(pvt, i);
- edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i);
- edac_dbg(1, " nr_pages: %u\n",
- nr_pages * pvt->channel_count);
+ edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
/*
* determine whether CHIPKILL or JUST ECC or NO ECC is operating
@@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci)
dimm->edac_mode = edac_mode;
dimm->nr_pages = nr_pages;
}
+ csrow->nr_pages = nr_pages;
}
return empty;
@@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
mci->pvt_info = pvt;
mci->pdev = &pvt->F2->dev;
+ mci->csbased = 1;
setup_mci_misc_attrs(mci, fam_type);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8c4139647ef..e864f407806 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -219,7 +219,7 @@
#define DBAM1 0x180
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
-#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF)
+#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
#define DBAM_MAX_VALUE 11
@@ -267,18 +267,20 @@
#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
#define F10_NB_ARRAY_ADDR 0xB8
-#define F10_NB_ARRAY_DRAM_ECC BIT(31)
+#define F10_NB_ARRAY_DRAM BIT(31)
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
-#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1)
+#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
#define F10_NB_ARRAY_DATA 0xBC
-#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \
- (BIT(((word) & 0xF) + 20) | \
- BIT(17) | bits)
-#define SET_NB_DRAM_INJECTION_READ(word, bits) \
- (BIT(((word) & 0xF) + 20) | \
- BIT(16) | bits)
+#define F10_NB_ARR_ECC_WR_REQ BIT(17)
+#define SET_NB_DRAM_INJECTION_WRITE(inj) \
+ (BIT(((inj.word) & 0xF) + 20) | \
+ F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
+#define SET_NB_DRAM_INJECTION_READ(inj) \
+ (BIT(((inj.word) & 0xF) + 20) | \
+ BIT(16) | inj.bit_map)
+
#define NBCAP 0xE8
#define NBCAP_CHIPKILL BIT(4)
@@ -305,9 +307,9 @@ enum amd_families {
/* Error injection control structure */
struct error_injection {
- u32 section;
- u32 word;
- u32 bit_map;
+ u32 section;
+ u32 word;
+ u32 bit_map;
};
/* low and high part of PCI config space regs */
@@ -374,6 +376,23 @@ struct amd64_pvt {
struct error_injection injection;
};
+enum err_codes {
+ DECODE_OK = 0,
+ ERR_NODE = -1,
+ ERR_CSROW = -2,
+ ERR_CHANNEL = -3,
+};
+
+struct err_info {
+ int err_code;
+ struct mem_ctl_info *src_mci;
+ int csrow;
+ int channel;
+ u16 syndrome;
+ u32 page;
+ u32 offset;
+};
+
static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
{
u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
@@ -447,7 +466,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
struct low_ops {
int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
- u16 syndrome);
+ struct err_info *);
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
u32 *val, const char *func);
@@ -459,6 +478,8 @@ struct amd64_family_type {
struct low_ops ops;
};
+int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 *val, const char *func);
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 val, const char *func);
@@ -475,3 +496,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size);
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+/* Injection helpers */
+static inline void disable_caches(void *dummy)
+{
+ write_cr0(read_cr0() | X86_CR0_CD);
+ wbinvd();
+}
+
+static inline void enable_caches(void *dummy)
+{
+ write_cr0(read_cr0() & ~X86_CR0_CD);
+}
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index 53d972e00df..8c171fa1cb9 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
+ int ret;
ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ if (ret < 0)
+ return ret;
- if (value > 3) {
- amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.section = (u32) value;
- return count;
+ if (value > 3) {
+ amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.section = (u32) value;
+ return count;
}
static ssize_t amd64_inject_word_show(struct device *dev,
@@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
+ int ret;
ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ if (ret < 0)
+ return ret;
- if (value > 8) {
- amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.word = (u32) value;
- return count;
+ if (value > 8) {
+ amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.word = (u32) value;
+ return count;
}
static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
@@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
+ int ret;
ret = strict_strtoul(data, 16, &value);
- if (ret != -EINVAL) {
+ if (ret < 0)
+ return ret;
- if (value & 0xFFFF0000) {
- amd64_warn("%s: invalid EccVector: 0x%lx\n",
- __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.bit_map = (u32) value;
- return count;
+ if (value & 0xFFFF0000) {
+ amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.bit_map = (u32) value;
+ return count;
}
/*
@@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev,
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
u32 section, word_bits;
- int ret = 0;
+ int ret;
ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ if (ret < 0)
+ return ret;
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM_ECC |
- SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
- word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
- pvt->injection.bit_map);
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
- /* Issue 'word' and 'bit' along with the READ request */
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
+ word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
- edac_dbg(0, "section=0x%x word_bits=0x%x\n",
- section, word_bits);
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
- return count;
- }
- return ret;
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
}
/*
@@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev,
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
+ u32 section, word_bits, tmp;
unsigned long value;
- u32 section, word_bits;
- int ret = 0;
+ int ret;
ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ if (ret < 0)
+ return ret;
+
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
+
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM_ECC |
- SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
+ word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
- word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
- pvt->injection.bit_map);
+ pr_notice_once("Don't forget to decrease MCE polling interval in\n"
+ "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
+ "so that you can get the error report faster.\n");
- /* Issue 'word' and 'bit' along with the READ request */
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
+ on_each_cpu(disable_caches, NULL, 1);
- edac_dbg(0, "section=0x%x word_bits=0x%x\n",
- section, word_bits);
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
- return count;
+ retry:
+ /* wait until injection happens */
+ amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
+ if (tmp & F10_NB_ARR_ECC_WR_REQ) {
+ cpu_relax();
+ goto retry;
}
- return ret;
+
+ on_each_cpu(enable_caches, NULL, 1);
+
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
}
/*
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 90f0b730e9b..281f566a551 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -416,10 +416,18 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
dimm->cschannel = chn;
/* Increment csrow location */
- row++;
- if (row == tot_csrows) {
- row = 0;
+ if (layers[0].is_virt_csrow) {
chn++;
+ if (chn == tot_channels) {
+ chn = 0;
+ row++;
+ }
+ } else {
+ row++;
+ if (row == tot_csrows) {
+ row = 0;
+ chn++;
+ }
}
/* Increment dimm location */
@@ -966,20 +974,22 @@ static void edac_ce_error(struct mem_ctl_info *mci,
long grain)
{
unsigned long remapped_page;
+ char *msg_aux = "";
+
+ if (*msg)
+ msg_aux = " ";
if (edac_mc_get_log_ce()) {
if (other_detail && *other_detail)
edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s on %s (%s %s - %s)\n",
- error_count,
- msg, label, location,
- detail, other_detail);
+ "%d CE %s%son %s (%s %s - %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail, other_detail);
else
edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s on %s (%s %s)\n",
- error_count,
- msg, label, location,
- detail);
+ "%d CE %s%son %s (%s %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail);
}
edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
@@ -1014,27 +1024,31 @@ static void edac_ue_error(struct mem_ctl_info *mci,
const char *other_detail,
const bool enable_per_layer_report)
{
+ char *msg_aux = "";
+
+ if (*msg)
+ msg_aux = " ";
+
if (edac_mc_get_log_ue()) {
if (other_detail && *other_detail)
edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s on %s (%s %s - %s)\n",
- error_count,
- msg, label, location, detail,
- other_detail);
+ "%d UE %s%son %s (%s %s - %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail, other_detail);
else
edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s on %s (%s %s)\n",
- error_count,
- msg, label, location, detail);
+ "%d UE %s%son %s (%s %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail);
}
if (edac_mc_get_panic_on_ue()) {
if (other_detail && *other_detail)
- panic("UE %s on %s (%s%s - %s)\n",
- msg, label, location, detail, other_detail);
+ panic("UE %s%son %s (%s%s - %s)\n",
+ msg, msg_aux, label, location, detail, other_detail);
else
- panic("UE %s on %s (%s%s)\n",
- msg, label, location, detail);
+ panic("UE %s%son %s (%s%s)\n",
+ msg, msg_aux, label, location, detail);
}
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
@@ -1093,10 +1107,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
*/
for (i = 0; i < mci->n_layers; i++) {
if (pos[i] >= (int)mci->layers[i].size) {
- if (type == HW_EVENT_ERR_CORRECTED)
- p = "CE";
- else
- p = "UE";
edac_mc_printk(mci, KERN_ERR,
"INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
@@ -1128,6 +1138,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
grain = 0;
p = label;
*p = '\0';
+
for (i = 0; i < mci->tot_dimms; i++) {
struct dimm_info *dimm = mci->dimms[i];
@@ -1195,6 +1206,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/* Fill the RAM location data */
p = location;
+
for (i = 0; i < mci->n_layers; i++) {
if (pos[i] < 0)
continue;
@@ -1207,7 +1219,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
*(p - 1) = '\0';
/* Report the error via the trace interface */
-
grain_bits = fls_long(grain) + 1;
trace_mc_event(type, msg, label, error_count,
mci->mc_idx, top_layer, mid_layer, low_layer,
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index ed0bc07b850..de2df92f9c7 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev,
int i;
u32 nr_pages = 0;
+ if (csrow->mci->csbased)
+ return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
+
for (i = 0; i < csrow->nr_channels; i++)
nr_pages += csrow->channels[i]->dimm->nr_pages;
return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
@@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
csrow->dev.bus = &mci->bus;
device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev;
+ csrow->mci = mci;
dev_set_name(&csrow->dev, "csrow%d", index);
dev_set_drvdata(&csrow->dev, csrow);
@@ -777,10 +781,14 @@ static ssize_t mci_size_mb_show(struct device *dev,
for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
struct csrow_info *csrow = mci->csrows[csrow_idx];
- for (j = 0; j < csrow->nr_channels; j++) {
- struct dimm_info *dimm = csrow->channels[j]->dimm;
+ if (csrow->mci->csbased) {
+ total_pages += csrow->nr_pages;
+ } else {
+ for (j = 0; j < csrow->nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j]->dimm;
- total_pages += dimm->nr_pages;
+ total_pages += dimm->nr_pages;
+ }
}
}
@@ -838,14 +846,8 @@ static ssize_t edac_fake_inject_write(struct file *file,
return count;
}
-static int debugfs_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
- return 0;
-}
-
static const struct file_operations debug_fake_inject_fops = {
- .open = debugfs_open,
+ .open = simple_open,
.write = edac_fake_inject_write,
.llseek = generic_file_llseek,
};
@@ -1124,10 +1126,15 @@ int __init edac_mc_sysfs_init(void)
edac_subsys = edac_get_sysfs_subsys();
if (edac_subsys == NULL) {
edac_dbg(1, "no edac_subsys\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
+ if (!mci_pdev) {
+ err = -ENOMEM;
+ goto out_put_sysfs;
+ }
mci_pdev->bus = edac_subsys;
mci_pdev->type = &mc_attr_type;
@@ -1136,11 +1143,18 @@ int __init edac_mc_sysfs_init(void)
err = device_add(mci_pdev);
if (err < 0)
- return err;
+ goto out_dev_free;
edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
return 0;
+
+ out_dev_free:
+ kfree(mci_pdev);
+ out_put_sysfs:
+ edac_put_sysfs_subsys();
+ out:
+ return err;
}
void __exit edac_mc_sysfs_exit(void)
@@ -1148,4 +1162,5 @@ void __exit edac_mc_sysfs_exit(void)
put_device(mci_pdev);
device_del(mci_pdev);
edac_put_sysfs_subsys();
+ kfree(mci_pdev);
}
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 58a28d838f3..12c951a2c33 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -18,9 +18,29 @@
#define EDAC_VERSION "Ver: 3.0.0"
#ifdef CONFIG_EDAC_DEBUG
+
+static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < 0 || val > 4)
+ return -EINVAL;
+
+ return param_set_int(buf, kp);
+}
+
/* Values of 0 to 4 will generate output */
int edac_debug_level = 2;
EXPORT_SYMBOL_GPL(edac_debug_level);
+
+module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
+ &edac_debug_level, 0644);
+MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
#endif
/* scope is to module level only */
@@ -132,10 +152,3 @@ module_exit(edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
MODULE_DESCRIPTION("Core library routines for EDAC reporting");
-
-/* refer to *_sysfs.c files for parameters that are exported via sysfs */
-
-#ifdef CONFIG_EDAC_DEBUG
-module_param(edac_debug_level, int, 0644);
-MODULE_PARM_DESC(edac_debug_level, "Debug level");
-#endif
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index ee87ef972ea..dd370f92ace 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
pci->mod_name = mod_name;
pci->ctl_name = EDAC_PCI_GENCTL_NAME;
- pci->edac_check = edac_pci_generic_check;
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ pci->edac_check = edac_pci_generic_check;
pdata->edac_idx = edac_pci_idx++;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index e164c555a33..dc6e905ee1a 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
/*
* pci_dev parity list iterator
- * Scan the PCI device list for one pass, looking for SERRORs
- * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
+ *
+ * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
+ * Parity ERRORs on primary or secondary devices.
*/
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
{
struct pci_dev *dev = NULL;
- /* request for kernel access to the next PCI device, if any,
- * and while we are looking at it have its reference count
- * bumped until we are done with it
- */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ for_each_pci_dev(dev)
fn(dev);
- }
}
/*
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
index c769f477fd2..7ea4cc2e8bd 100644
--- a/drivers/edac/highbank_mc_edac.c
+++ b/drivers/edac/highbank_mc_edac.c
@@ -113,14 +113,8 @@ static ssize_t highbank_mc_err_inject_write(struct file *file,
return count;
}
-static int debugfs_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
- return 0;
-}
-
static const struct file_operations highbank_mc_debug_inject_fops = {
- .open = debugfs_open,
+ .open = simple_open,
.write = highbank_mc_err_inject_write,
.llseek = generic_file_llseek,
};
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index a09d0667f72..9d669cd4361 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -197,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = {
[0] = "Memory Write error on non-redundant retry or "
"FBD configuration Write error on retry",
};
-#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28))
-#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
+#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3)
+#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22))
#define FERR_NF_FBD 0xa0
static const char *ferr_nf_fbd_name[] = {
@@ -225,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = {
[1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
[0] = "Uncorrectable Data ECC on Replay",
};
-#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28))
+#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3)
#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
(1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
(1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
@@ -464,7 +464,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_nf_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
- branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
+ branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0;
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMA, &syndrome);
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 3672101023b..10c8c00d646 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -816,7 +816,7 @@ static ssize_t i7core_inject_store_##param( \
struct device_attribute *mattr, \
const char *data, size_t count) \
{ \
- struct mem_ctl_info *mci = to_mci(dev); \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt; \
long value; \
int rc; \
@@ -845,7 +845,7 @@ static ssize_t i7core_inject_show_##param( \
struct device_attribute *mattr, \
char *data) \
{ \
- struct mem_ctl_info *mci = to_mci(dev); \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt; \
\
pvt = mci->pvt_info; \
@@ -1052,7 +1052,7 @@ static ssize_t i7core_show_counter_##param( \
struct device_attribute *mattr, \
char *data) \
{ \
- struct mem_ctl_info *mci = to_mci(dev); \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt = mci->pvt_info; \
\
edac_dbg(1, "\n"); \
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 069e26c11c4..a98020409fa 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -370,10 +370,6 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank)
static void i82975x_init_csrows(struct mem_ctl_info *mci,
struct pci_dev *pdev, void __iomem *mch_window)
{
- static const char *labels[4] = {
- "DIMM A1", "DIMM A2",
- "DIMM B1", "DIMM B2"
- };
struct csrow_info *csrow;
unsigned long last_cumul_size;
u8 value;
@@ -423,9 +419,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
dimm = mci->csrows[index]->channels[chan]->dimm;
dimm->nr_pages = nr_pages / csrow->nr_channels;
- strncpy(csrow->channels[chan]->dimm->label,
- labels[(index >> 1) + (chan * 2)],
- EDAC_MC_LABEL_LEN);
+
+ snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d",
+ (chan == 0) ? 'A' : 'B',
+ index);
dimm->grain = 1 << 7; /* 128Byte cache-line resolution */
dimm->dtype = i82975x_dram_type(mch_window, index);
dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index d0c372e30de..ad637572d8c 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs);
const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
EXPORT_SYMBOL_GPL(ii_msgs);
-static const char * const f15h_ic_mce_desc[] = {
+static const char * const f15h_mc1_mce_desc[] = {
"UC during a demand linefill from L2",
"Parity error during data load from IC",
"Parity error for IC valid bit",
@@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = {
"fetch address FIFO"
};
-static const char * const f15h_cu_mce_desc[] = {
+static const char * const f15h_mc2_mce_desc[] = {
"Fill ECC error on data fills", /* xec = 0x4 */
"Fill parity error on insn fills",
"Prefetcher request FIFO parity error",
@@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = {
"PRB address parity error"
};
-static const char * const nb_mce_desc[] = {
+static const char * const mc4_mce_desc[] = {
"DRAM ECC error detected on the NB",
"CRC error detected on HT link",
"Link-defined sync error packets detected on HT link",
@@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = {
"ECC Error in the Probe Filter directory"
};
-static const char * const fr_ex_mce_desc[] = {
+static const char * const mc5_mce_desc[] = {
"CPU Watchdog timer expire",
"Wakeup array dest tag",
"AG payload array",
@@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = {
"DE error occurred"
};
-static bool f12h_dc_mce(u16 ec, u8 xec)
+static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec)
return ret;
}
-static bool f10h_dc_mce(u16 ec, u8 xec)
+static bool f10h_mc0_mce(u16 ec, u8 xec)
{
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
pr_cont("during data scrub.\n");
return true;
}
- return f12h_dc_mce(ec, xec);
+ return f12h_mc0_mce(ec, xec);
}
-static bool k8_dc_mce(u16 ec, u8 xec)
+static bool k8_mc0_mce(u16 ec, u8 xec)
{
if (BUS_ERROR(ec)) {
pr_cont("during system linefill.\n");
return true;
}
- return f10h_dc_mce(ec, xec);
+ return f10h_mc0_mce(ec, xec);
}
-static bool f14h_dc_mce(u16 ec, u8 xec)
+static bool f14h_mc0_mce(u16 ec, u8 xec)
{
u8 r4 = R4(ec);
bool ret = true;
@@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec)
return ret;
}
-static bool f15h_dc_mce(u16 ec, u8 xec)
+static bool f15h_mc0_mce(u16 ec, u8 xec)
{
bool ret = true;
@@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec)
return ret;
}
-static void amd_decode_dc_mce(struct mce *m)
+static void decode_mc0_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Data Cache Error: ");
+ pr_emerg(HW_ERR "MC0 Error: ");
/* TLB error signatures are the same across families */
if (TLB_ERROR(ec)) {
@@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m)
: (xec ? "multimatch" : "parity")));
return;
}
- } else if (fam_ops->dc_mce(ec, xec))
+ } else if (fam_ops->mc0_mce(ec, xec))
;
else
- pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
+ pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
}
-static bool k8_ic_mce(u16 ec, u8 xec)
+static bool k8_mc1_mce(u16 ec, u8 xec)
{
u8 ll = LL(ec);
bool ret = true;
@@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec)
return ret;
}
-static bool f14h_ic_mce(u16 ec, u8 xec)
+static bool f14h_mc1_mce(u16 ec, u8 xec)
{
u8 r4 = R4(ec);
bool ret = true;
@@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec)
return ret;
}
-static bool f15h_ic_mce(u16 ec, u8 xec)
+static bool f15h_mc1_mce(u16 ec, u8 xec)
{
bool ret = true;
@@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
switch (xec) {
case 0x0 ... 0xa:
- pr_cont("%s.\n", f15h_ic_mce_desc[xec]);
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
break;
case 0xd:
- pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]);
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
break;
case 0x10:
- pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]);
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
break;
case 0x11 ... 0x14:
- pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]);
+ pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
break;
default:
@@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
return ret;
}
-static void amd_decode_ic_mce(struct mce *m)
+static void decode_mc1_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Instruction Cache Error: ");
+ pr_emerg(HW_ERR "MC1 Error: ");
if (TLB_ERROR(ec))
pr_cont("%s TLB %s.\n", LL_MSG(ec),
@@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m)
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
- } else if (fam_ops->ic_mce(ec, xec))
+ } else if (fam_ops->mc1_mce(ec, xec))
;
else
- pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
+ pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
}
-static void amd_decode_bu_mce(struct mce *m)
+static void decode_mc2_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Bus Unit Error");
+ pr_emerg(HW_ERR "MC2 Error");
if (xec == 0x1)
pr_cont(" in the write data buffers.\n");
@@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m)
pr_cont(": %s parity/ECC error during data "
"access from L2.\n", R4_MSG(ec));
else
- goto wrong_bu_mce;
+ goto wrong_mc2_mce;
} else
- goto wrong_bu_mce;
+ goto wrong_mc2_mce;
} else
- goto wrong_bu_mce;
+ goto wrong_mc2_mce;
return;
-wrong_bu_mce:
- pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
+ wrong_mc2_mce:
+ pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
}
-static void amd_decode_cu_mce(struct mce *m)
+static void decode_f15_mc2_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Combined Unit Error: ");
+ pr_emerg(HW_ERR "MC2 Error: ");
if (TLB_ERROR(ec)) {
if (xec == 0x0)
@@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m)
else if (xec == 0x1)
pr_cont("Poison data provided for TLB fill.\n");
else
- goto wrong_cu_mce;
+ goto wrong_f15_mc2_mce;
} else if (BUS_ERROR(ec)) {
if (xec > 2)
- goto wrong_cu_mce;
+ goto wrong_f15_mc2_mce;
pr_cont("Error during attempted NB data read.\n");
} else if (MEM_ERROR(ec)) {
switch (xec) {
case 0x4 ... 0xc:
- pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]);
+ pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
break;
case 0x10 ... 0x14:
- pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]);
+ pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
break;
default:
- goto wrong_cu_mce;
+ goto wrong_f15_mc2_mce;
}
}
return;
-wrong_cu_mce:
- pr_emerg(HW_ERR "Corrupted CU MCE info?\n");
+ wrong_f15_mc2_mce:
+ pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
}
-static void amd_decode_ls_mce(struct mce *m)
+static void decode_mc3_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
if (boot_cpu_data.x86 >= 0x14) {
- pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
+ pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
" please report on LKML.\n");
return;
}
- pr_emerg(HW_ERR "Load Store Error");
+ pr_emerg(HW_ERR "MC3 Error");
if (xec == 0x0) {
u8 r4 = R4(ec);
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
- goto wrong_ls_mce;
+ goto wrong_mc3_mce;
pr_cont(" during %s.\n", R4_MSG(ec));
} else
- goto wrong_ls_mce;
+ goto wrong_mc3_mce;
return;
-wrong_ls_mce:
- pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
+ wrong_mc3_mce:
+ pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
}
-void amd_decode_nb_mce(struct mce *m)
+static void decode_mc4_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
int node_id = amd_get_nb_id(m->extcpu);
@@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m)
u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;
- pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id);
+ pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
switch (xec) {
case 0x0 ... 0xe:
@@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m)
if (xec == 0x0 || xec == 0x8) {
/* no ECCs on F11h */
if (c->x86 == 0x11)
- goto wrong_nb_mce;
+ goto wrong_mc4_mce;
- pr_cont("%s.\n", nb_mce_desc[xec]);
+ pr_cont("%s.\n", mc4_mce_desc[xec]);
if (nb_bus_decoder)
nb_bus_decoder(node_id, m);
@@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m)
else if (BUS_ERROR(ec))
pr_cont("DMA Exclusion Vector Table Walk error.\n");
else
- goto wrong_nb_mce;
+ goto wrong_mc4_mce;
return;
case 0x19:
if (boot_cpu_data.x86 == 0x15)
pr_cont("Compute Unit Data Error.\n");
else
- goto wrong_nb_mce;
+ goto wrong_mc4_mce;
return;
case 0x1c ... 0x1f:
@@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m)
break;
default:
- goto wrong_nb_mce;
+ goto wrong_mc4_mce;
}
- pr_cont("%s.\n", nb_mce_desc[xec - offset]);
+ pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
return;
-wrong_nb_mce:
- pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
+ wrong_mc4_mce:
+ pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
}
-EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
-static void amd_decode_fr_mce(struct mce *m)
+static void decode_mc5_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
u8 xec = XEC(m->status, xec_mask);
if (c->x86 == 0xf || c->x86 == 0x11)
- goto wrong_fr_mce;
+ goto wrong_mc5_mce;
- pr_emerg(HW_ERR "%s Error: ",
- (c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
+ pr_emerg(HW_ERR "MC5 Error: ");
if (xec == 0x0 || xec == 0xc)
- pr_cont("%s.\n", fr_ex_mce_desc[xec]);
+ pr_cont("%s.\n", mc5_mce_desc[xec]);
else if (xec < 0xd)
- pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]);
+ pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
else
- goto wrong_fr_mce;
+ goto wrong_mc5_mce;
return;
-wrong_fr_mce:
- pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
+ wrong_mc5_mce:
+ pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
}
-static void amd_decode_fp_mce(struct mce *m)
+static void decode_mc6_mce(struct mce *m)
{
u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Floating Point Unit Error: ");
+ pr_emerg(HW_ERR "MC6 Error: ");
switch (xec) {
case 0x1:
@@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m)
break;
default:
- goto wrong_fp_mce;
+ goto wrong_mc6_mce;
break;
}
@@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m)
return;
-wrong_fp_mce:
- pr_emerg(HW_ERR "Corrupted FP MCE info?\n");
+ wrong_mc6_mce:
+ pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
static inline void amd_decode_err_code(u16 ec)
@@ -669,74 +667,94 @@ static bool amd_filter_mce(struct mce *m)
return false;
}
+static const char *decode_error_status(struct mce *m)
+{
+ if (m->status & MCI_STATUS_UC) {
+ if (m->status & MCI_STATUS_PCC)
+ return "System Fatal error.";
+ if (m->mcgstatus & MCG_STATUS_RIPV)
+ return "Uncorrected, software restartable error.";
+ return "Uncorrected, software containable error.";
+ }
+
+ if (m->status & MCI_STATUS_DEFERRED)
+ return "Deferred error.";
+
+ return "Corrected error, no action required.";
+}
+
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
- struct cpuinfo_x86 *c = &boot_cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
if (amd_filter_mce(m))
return NOTIFY_STOP;
- pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s",
- m->extcpu, m->bank,
- ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
- ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
- ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
- ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
- ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
-
- if (c->x86 == 0x15)
- pr_cont("|%s|%s",
- ((m->status & BIT_64(44)) ? "Deferred" : "-"),
- ((m->status & BIT_64(43)) ? "Poison" : "-"));
-
- /* do the two bits[14:13] together */
- ecc = (m->status >> 45) & 0x3;
- if (ecc)
- pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
-
- pr_cont("]: 0x%016llx\n", m->status);
-
- if (m->status & MCI_STATUS_ADDRV)
- pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
-
switch (m->bank) {
case 0:
- amd_decode_dc_mce(m);
+ decode_mc0_mce(m);
break;
case 1:
- amd_decode_ic_mce(m);
+ decode_mc1_mce(m);
break;
case 2:
if (c->x86 == 0x15)
- amd_decode_cu_mce(m);
+ decode_f15_mc2_mce(m);
else
- amd_decode_bu_mce(m);
+ decode_mc2_mce(m);
break;
case 3:
- amd_decode_ls_mce(m);
+ decode_mc3_mce(m);
break;
case 4:
- amd_decode_nb_mce(m);
+ decode_mc4_mce(m);
break;
case 5:
- amd_decode_fr_mce(m);
+ decode_mc5_mce(m);
break;
case 6:
- amd_decode_fp_mce(m);
+ decode_mc6_mce(m);
break;
default:
break;
}
+ pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m));
+
+ pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
+ m->extcpu,
+ c->x86, c->x86_model, c->x86_mask,
+ m->bank,
+ ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
+ ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
+ ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
+ ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
+ ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
+
+ if (c->x86 == 0x15)
+ pr_cont("|%s|%s",
+ ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
+ ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
+
+ /* do the two bits[14:13] together */
+ ecc = (m->status >> 45) & 0x3;
+ if (ecc)
+ pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
+
+ pr_cont("]: 0x%016llx\n", m->status);
+
+ if (m->status & MCI_STATUS_ADDRV)
+ pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
+
amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP;
@@ -763,35 +781,35 @@ static int __init mce_amd_init(void)
switch (c->x86) {
case 0xf:
- fam_ops->dc_mce = k8_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->mc0_mce = k8_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
break;
case 0x10:
- fam_ops->dc_mce = f10h_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->mc0_mce = f10h_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
break;
case 0x11:
- fam_ops->dc_mce = k8_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->mc0_mce = k8_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
break;
case 0x12:
- fam_ops->dc_mce = f12h_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->mc0_mce = f12h_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
break;
case 0x14:
nb_err_cpumask = 0x3;
- fam_ops->dc_mce = f14h_dc_mce;
- fam_ops->ic_mce = f14h_ic_mce;
+ fam_ops->mc0_mce = f14h_mc0_mce;
+ fam_ops->mc1_mce = f14h_mc1_mce;
break;
case 0x15:
xec_mask = 0x1f;
- fam_ops->dc_mce = f15h_dc_mce;
- fam_ops->ic_mce = f15h_ic_mce;
+ fam_ops->mc0_mce = f15h_mc0_mce;
+ fam_ops->mc1_mce = f15h_mc1_mce;
break;
default:
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 8c87a5e8705..679679951e2 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -29,10 +29,8 @@
#define R4(x) (((x) >> 4) & 0xf)
#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
-/*
- * F3x4C bits (MCi_STATUS' high half)
- */
-#define NBSH_ERR_CPU_VAL BIT(24)
+#define MCI_STATUS_DEFERRED BIT_64(44)
+#define MCI_STATUS_POISON BIT_64(43)
enum tt_ids {
TT_INSTR = 0,
@@ -78,14 +76,13 @@ extern const char * const ii_msgs[];
* per-family decoder ops
*/
struct amd_decoder_ops {
- bool (*dc_mce)(u16, u8);
- bool (*ic_mce)(u16, u8);
+ bool (*mc0_mce)(u16, u8);
+ bool (*mc1_mce)(u16, u8);
};
void amd_report_gart_errors(bool);
void amd_register_ecc_decoder(void (*f)(int, struct mce *));
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
-void amd_decode_nb_mce(struct mce *);
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c
new file mode 100644
index 00000000000..40fde6a51ed
--- /dev/null
+++ b/drivers/edac/octeon_edac-l2c.c
@@ -0,0 +1,208 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include <asm/octeon/cvmx.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define EDAC_MOD_STR "octeon-l2c"
+
+static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c)
+{
+ union cvmx_l2t_err l2t_err, l2t_err_reset;
+ union cvmx_l2d_err l2d_err, l2d_err_reset;
+
+ l2t_err_reset.u64 = 0;
+ l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+ if (l2t_err.s.sec_err) {
+ edac_device_handle_ce(l2c, 0, 0,
+ "Tag Single bit error (corrected)");
+ l2t_err_reset.s.sec_err = 1;
+ }
+ if (l2t_err.s.ded_err) {
+ edac_device_handle_ue(l2c, 0, 0,
+ "Tag Double bit error (detected)");
+ l2t_err_reset.s.ded_err = 1;
+ }
+ if (l2t_err_reset.u64)
+ cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64);
+
+ l2d_err_reset.u64 = 0;
+ l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
+ if (l2d_err.s.sec_err) {
+ edac_device_handle_ce(l2c, 0, 1,
+ "Data Single bit error (corrected)");
+ l2d_err_reset.s.sec_err = 1;
+ }
+ if (l2d_err.s.ded_err) {
+ edac_device_handle_ue(l2c, 0, 1,
+ "Data Double bit error (detected)");
+ l2d_err_reset.s.ded_err = 1;
+ }
+ if (l2d_err_reset.u64)
+ cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64);
+
+}
+
+static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad)
+{
+ union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset;
+ union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset;
+ char buf1[64];
+ char buf2[80];
+
+ err_tdtx_reset.u64 = 0;
+ err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad));
+ if (err_tdtx.s.dbe || err_tdtx.s.sbe ||
+ err_tdtx.s.vdbe || err_tdtx.s.vsbe)
+ snprintf(buf1, sizeof(buf1),
+ "type:%d, syn:0x%x, way:%d",
+ err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx);
+
+ if (err_tdtx.s.dbe) {
+ snprintf(buf2, sizeof(buf2),
+ "L2D Double bit error (detected):%s", buf1);
+ err_tdtx_reset.s.dbe = 1;
+ edac_device_handle_ue(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.sbe) {
+ snprintf(buf2, sizeof(buf2),
+ "L2D Single bit error (corrected):%s", buf1);
+ err_tdtx_reset.s.sbe = 1;
+ edac_device_handle_ce(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.vdbe) {
+ snprintf(buf2, sizeof(buf2),
+ "VBF Double bit error (detected):%s", buf1);
+ err_tdtx_reset.s.vdbe = 1;
+ edac_device_handle_ue(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.vsbe) {
+ snprintf(buf2, sizeof(buf2),
+ "VBF Single bit error (corrected):%s", buf1);
+ err_tdtx_reset.s.vsbe = 1;
+ edac_device_handle_ce(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx_reset.u64)
+ cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64);
+
+ err_ttgx_reset.u64 = 0;
+ err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad));
+
+ if (err_ttgx.s.dbe || err_ttgx.s.sbe)
+ snprintf(buf1, sizeof(buf1),
+ "type:%d, syn:0x%x, way:%d",
+ err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx);
+
+ if (err_ttgx.s.dbe) {
+ snprintf(buf2, sizeof(buf2),
+ "Tag Double bit error (detected):%s", buf1);
+ err_ttgx_reset.s.dbe = 1;
+ edac_device_handle_ue(l2c, tad, 0, buf2);
+ }
+ if (err_ttgx.s.sbe) {
+ snprintf(buf2, sizeof(buf2),
+ "Tag Single bit error (corrected):%s", buf1);
+ err_ttgx_reset.s.sbe = 1;
+ edac_device_handle_ce(l2c, tad, 0, buf2);
+ }
+ if (err_ttgx_reset.u64)
+ cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64);
+}
+
+static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c)
+{
+ int i;
+ for (i = 0; i < l2c->nr_instances; i++)
+ _octeon_l2c_poll_oct2(l2c, i);
+}
+
+static int __devinit octeon_l2c_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *l2c;
+
+ int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1;
+
+ /* 'Tags' are block 0, 'Data' is block 1*/
+ l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0,
+ NULL, 0, edac_device_alloc_index());
+ if (!l2c)
+ return -ENOMEM;
+
+ l2c->dev = &pdev->dev;
+ platform_set_drvdata(pdev, l2c);
+ l2c->dev_name = dev_name(&pdev->dev);
+
+ l2c->mod_name = "octeon-l2c";
+ l2c->ctl_name = "octeon_l2c_err";
+
+
+ if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+ union cvmx_l2t_err l2t_err;
+ union cvmx_l2d_err l2d_err;
+
+ l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+ l2t_err.s.sec_intena = 0; /* We poll */
+ l2t_err.s.ded_intena = 0;
+ cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
+
+ l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
+ l2d_err.s.sec_intena = 0; /* We poll */
+ l2d_err.s.ded_intena = 0;
+ cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64);
+
+ l2c->edac_check = octeon_l2c_poll_oct1;
+ } else {
+ /* OCTEON II */
+ l2c->edac_check = octeon_l2c_poll_oct2;
+ }
+
+ if (edac_device_add_device(l2c) > 0) {
+ pr_err("%s: edac_device_add_device() failed\n", __func__);
+ goto err;
+ }
+
+
+ return 0;
+
+err:
+ edac_device_free_ctl_info(l2c);
+
+ return -ENXIO;
+}
+
+static int octeon_l2c_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev);
+
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(l2c);
+
+ return 0;
+}
+
+static struct platform_driver octeon_l2c_driver = {
+ .probe = octeon_l2c_probe,
+ .remove = octeon_l2c_remove,
+ .driver = {
+ .name = "octeon_l2c_edac",
+ }
+};
+module_platform_driver(octeon_l2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c
new file mode 100644
index 00000000000..33bca766e37
--- /dev/null
+++ b/drivers/edac/octeon_edac-lmc.c
@@ -0,0 +1,186 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-lmcx-defs.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define OCTEON_MAX_MC 4
+
+static void octeon_lmc_edac_poll(struct mem_ctl_info *mci)
+{
+ union cvmx_lmcx_mem_cfg0 cfg0;
+ bool do_clear = false;
+ char msg[64];
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx));
+ if (cfg0.s.sec_err || cfg0.s.ded_err) {
+ union cvmx_lmcx_fadr fadr;
+ fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
+ snprintf(msg, sizeof(msg),
+ "DIMM %d rank %d bank %d row %d col %d",
+ fadr.cn30xx.fdimm, fadr.cn30xx.fbunk,
+ fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol);
+ }
+
+ if (cfg0.s.sec_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ cfg0.s.sec_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+
+ if (cfg0.s.ded_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ cfg0.s.ded_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+ if (do_clear)
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64);
+}
+
+static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci)
+{
+ union cvmx_lmcx_int int_reg;
+ bool do_clear = false;
+ char msg[64];
+
+ int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx));
+ if (int_reg.s.sec_err || int_reg.s.ded_err) {
+ union cvmx_lmcx_fadr fadr;
+ fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
+ snprintf(msg, sizeof(msg),
+ "DIMM %d rank %d bank %d row %d col %d",
+ fadr.cn61xx.fdimm, fadr.cn61xx.fbunk,
+ fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol);
+ }
+
+ if (int_reg.s.sec_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ int_reg.s.sec_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+
+ if (int_reg.s.ded_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ int_reg.s.ded_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+ if (do_clear)
+ cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64);
+}
+
+static int __devinit octeon_lmc_edac_probe(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[1];
+ int mc = pdev->id;
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = false;
+
+ if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+ union cvmx_lmcx_mem_cfg0 cfg0;
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0));
+ if (!cfg0.s.ecc_ena) {
+ dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
+ return 0;
+ }
+
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
+ if (!mci)
+ return -ENXIO;
+
+ mci->pdev = &pdev->dev;
+ mci->dev_name = dev_name(&pdev->dev);
+
+ mci->mod_name = "octeon-lmc";
+ mci->ctl_name = "octeon-lmc-err";
+ mci->edac_check = octeon_lmc_edac_poll;
+
+ if (edac_mc_add_mc(mci)) {
+ dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
+ edac_mc_free(mci);
+ return -ENXIO;
+ }
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
+ cfg0.s.intr_ded_ena = 0; /* We poll */
+ cfg0.s.intr_sec_ena = 0;
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64);
+ } else {
+ /* OCTEON II */
+ union cvmx_lmcx_int_en en;
+ union cvmx_lmcx_config config;
+
+ config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0));
+ if (!config.s.ecc_ena) {
+ dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
+ return 0;
+ }
+
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
+ if (!mci)
+ return -ENXIO;
+
+ mci->pdev = &pdev->dev;
+ mci->dev_name = dev_name(&pdev->dev);
+
+ mci->mod_name = "octeon-lmc";
+ mci->ctl_name = "co_lmc_err";
+ mci->edac_check = octeon_lmc_edac_poll_o2;
+
+ if (edac_mc_add_mc(mci)) {
+ dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
+ edac_mc_free(mci);
+ return -ENXIO;
+ }
+
+ en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
+ en.s.intr_ded_ena = 0; /* We poll */
+ en.s.intr_sec_ena = 0;
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64);
+ }
+ platform_set_drvdata(pdev, mci);
+
+ return 0;
+}
+
+static int octeon_lmc_edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+ return 0;
+}
+
+static struct platform_driver octeon_lmc_edac_driver = {
+ .probe = octeon_lmc_edac_probe,
+ .remove = octeon_lmc_edac_remove,
+ .driver = {
+ .name = "octeon_lmc_edac",
+ }
+};
+module_platform_driver(octeon_lmc_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
new file mode 100644
index 00000000000..14a5e57f2b3
--- /dev/null
+++ b/drivers/edac/octeon_edac-pc.c
@@ -0,0 +1,143 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#include <asm/octeon/cvmx.h>
+#include <asm/mipsregs.h>
+
+extern int register_co_cache_error_notifier(struct notifier_block *nb);
+extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
+
+extern unsigned long long cache_err_dcache[NR_CPUS];
+
+struct co_cache_error {
+ struct notifier_block notifier;
+ struct edac_device_ctl_info *ed;
+};
+
+/**
+ * EDAC CPU cache error callback
+ *
+ * @event: non-zero if unrecoverable.
+ */
+static int co_cache_error_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct co_cache_error *p = container_of(this, struct co_cache_error,
+ notifier);
+
+ unsigned int core = cvmx_get_core_num();
+ unsigned int cpu = smp_processor_id();
+ u64 icache_err = read_octeon_c0_icacheerr();
+ u64 dcache_err;
+
+ if (event) {
+ dcache_err = cache_err_dcache[core];
+ cache_err_dcache[core] = 0;
+ } else {
+ dcache_err = read_octeon_c0_dcacheerr();
+ }
+
+ if (icache_err & 1) {
+ edac_device_printk(p->ed, KERN_ERR,
+ "CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
+ (unsigned long long)icache_err, core, cpu,
+ read_c0_errorepc());
+ write_octeon_c0_icacheerr(0);
+ edac_device_handle_ce(p->ed, cpu, 1, "icache");
+ }
+ if (dcache_err & 1) {
+ edac_device_printk(p->ed, KERN_ERR,
+ "CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
+ (unsigned long long)dcache_err, core, cpu,
+ read_c0_errorepc());
+ if (event)
+ edac_device_handle_ue(p->ed, cpu, 0, "dcache");
+ else
+ edac_device_handle_ce(p->ed, cpu, 0, "dcache");
+
+ /* Clear the error indication */
+ if (OCTEON_IS_MODEL(OCTEON_FAM_2))
+ write_octeon_c0_dcacheerr(1);
+ else
+ write_octeon_c0_dcacheerr(0);
+ }
+
+ return NOTIFY_STOP;
+}
+
+static int __devinit co_cache_error_probe(struct platform_device *pdev)
+{
+ struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p),
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->notifier.notifier_call = co_cache_error_event;
+ platform_set_drvdata(pdev, p);
+
+ p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
+ "cache", 2, 0, NULL, 0,
+ edac_device_alloc_index());
+ if (!p->ed)
+ goto err;
+
+ p->ed->dev = &pdev->dev;
+
+ p->ed->dev_name = dev_name(&pdev->dev);
+
+ p->ed->mod_name = "octeon-cpu";
+ p->ed->ctl_name = "cache";
+
+ if (edac_device_add_device(p->ed)) {
+ pr_err("%s: edac_device_add_device() failed\n", __func__);
+ goto err1;
+ }
+
+ register_co_cache_error_notifier(&p->notifier);
+
+ return 0;
+
+err1:
+ edac_device_free_ctl_info(p->ed);
+err:
+ return -ENXIO;
+}
+
+static int co_cache_error_remove(struct platform_device *pdev)
+{
+ struct co_cache_error *p = platform_get_drvdata(pdev);
+
+ unregister_co_cache_error_notifier(&p->notifier);
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(p->ed);
+ return 0;
+}
+
+static struct platform_driver co_cache_error_driver = {
+ .probe = co_cache_error_probe,
+ .remove = co_cache_error_remove,
+ .driver = {
+ .name = "octeon_pc_edac",
+ }
+};
+module_platform_driver(co_cache_error_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c
new file mode 100644
index 00000000000..758c1ef5fc9
--- /dev/null
+++ b/drivers/edac/octeon_edac-pci.c
@@ -0,0 +1,111 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/cvmx-npi-defs.h>
+#include <asm/octeon/cvmx-pci-defs.h>
+#include <asm/octeon/octeon.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+static void octeon_pci_poll(struct edac_pci_ctl_info *pci)
+{
+ union cvmx_pci_cfg01 cfg01;
+
+ cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01);
+ if (cfg01.s.dpe) { /* Detected parity error */
+ edac_pci_handle_pe(pci, pci->ctl_name);
+ cfg01.s.dpe = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.sse) {
+ edac_pci_handle_npe(pci, "Signaled System Error");
+ cfg01.s.sse = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.rma) {
+ edac_pci_handle_npe(pci, "Received Master Abort");
+ cfg01.s.rma = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.rta) {
+ edac_pci_handle_npe(pci, "Received Target Abort");
+ cfg01.s.rta = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.sta) {
+ edac_pci_handle_npe(pci, "Signaled Target Abort");
+ cfg01.s.sta = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.mdpe) {
+ edac_pci_handle_npe(pci, "Master Data Parity Error");
+ cfg01.s.mdpe = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+}
+
+static int __devinit octeon_pci_probe(struct platform_device *pdev)
+{
+ struct edac_pci_ctl_info *pci;
+ int res = 0;
+
+ pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err");
+ if (!pci)
+ return -ENOMEM;
+
+ pci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pci);
+ pci->dev_name = dev_name(&pdev->dev);
+
+ pci->mod_name = "octeon-pci";
+ pci->ctl_name = "octeon_pci_err";
+ pci->edac_check = octeon_pci_poll;
+
+ if (edac_pci_add_device(pci, 0) > 0) {
+ pr_err("%s: edac_pci_add_device() failed\n", __func__);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ edac_pci_free_ctl_info(pci);
+
+ return res;
+}
+
+static int octeon_pci_remove(struct platform_device *pdev)
+{
+ struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+
+ edac_pci_del_device(&pdev->dev);
+ edac_pci_free_ctl_info(pci);
+
+ return 0;
+}
+
+static struct platform_driver octeon_pci_driver = {
+ .probe = octeon_pci_probe,
+ .remove = octeon_pci_remove,
+ .driver = {
+ .name = "octeon_pci_edac",
+ }
+};
+module_platform_driver(octeon_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");